Add a GUC parameter seq_page_cost, and use that everywhere we formerly

assumed that a sequential page fetch has cost 1.0. This patch doesn't in itself change the system's behavior at all, but it opens the door to people adopting other units of measurement for EXPLAIN costs. Also, if we ever decide it's worth inventing per-tablespace access cost settings, this change provides a workable intellectual framework for that.
author: Tom Lane <tgl@sss.pgh.pa.us> 2006-06-05 02:49:58 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2006-06-05 02:49:58 +0000
commit: eed6c9ed7e243948e440ed1388591b7fa28f5827 (patch)
tree: aef8821bdd57ed83e2c1b2ce27014e45beed0934 /src/backend
parent: a837851dc060515c1f7023f74d75156a80c2c936 (diff)
download: postgresql-eed6c9ed7e243948e440ed1388591b7fa28f5827.tar.gz
postgresql-eed6c9ed7e243948e440ed1388591b7fa28f5827.zip
4 files changed, 82 insertions, 71 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 4b050d62ec6..a5074296065 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3,14 +3,19 @@
  * costsize.c
  *	  Routines to compute (and set) relation sizes and path costs
  *
- * Path costs are measured in units of disk accesses: one sequential page
- * fetch has cost 1.  All else is scaled relative to a page fetch, using
- * the scaling parameters
+ * Path costs are measured in arbitrary units established by these basic
+ * parameters:
  *
+ *	seq_page_cost		Cost of a sequential page fetch
  *	random_page_cost	Cost of a non-sequential page fetch
  *	cpu_tuple_cost		Cost of typical CPU time to process a tuple
  *	cpu_index_tuple_cost  Cost of typical CPU time to process an index tuple
- *	cpu_operator_cost	Cost of CPU time to process a typical WHERE operator
+ *	cpu_operator_cost	Cost of CPU time to execute an operator or function
+ *
+ * We expect that the kernel will typically do some amount of read-ahead
+ * optimization; this in conjunction with seek costs means that seq_page_cost
+ * is normally considerably less than random_page_cost.  (However, if the
+ * database is fully cached in RAM, it is reasonable to set them equal.)
  *
  * We also use a rough estimate "effective_cache_size" of the number of
  * disk pages in Postgres + OS-level disk cache.  (We can't simply use
@@ -49,7 +54,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.155 2006/03/05 15:58:28 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.156 2006/06/05 02:49:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -85,12 +90,14 @@
 	 (path)->parent->rows)
 
 
-double		effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
+double		seq_page_cost = DEFAULT_SEQ_PAGE_COST;
 double		random_page_cost = DEFAULT_RANDOM_PAGE_COST;
 double		cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
 double		cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
 double		cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
 
+double		effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
+
 Cost		disable_cost = 100000000.0;
 
 bool		enable_seqscan = true;
@@ -156,14 +163,8 @@ cost_seqscan(Path *path, PlannerInfo *root,
 
 	/*
 	 * disk costs
-	 *
-	 * The cost of reading a page sequentially is 1.0, by definition. Note
-	 * that the Unix kernel will typically do some amount of read-ahead
-	 * optimization, so that this cost is less than the true cost of reading a
-	 * page from disk.	We ignore that issue here, but must take it into
-	 * account when estimating the cost of non-sequential accesses!
 	 */
-	run_cost += baserel->pages; /* sequential fetches with cost 1.0 */
+	run_cost += seq_page_cost * baserel->pages;
 
 	/* CPU costs */
 	startup_cost += baserel->baserestrictcost.startup;
@@ -194,20 +195,21 @@ cost_seqscan(Path *path, PlannerInfo *root,
  * with the entirely ad-hoc equations (writing relsize for
  * relpages/effective_cache_size):
  *	if relsize >= 1:
- *		random_page_cost - (random_page_cost-1)/2 * (1/relsize)
+ *		random_page_cost - (random_page_cost-seq_page_cost)/2 * (1/relsize)
  *	if relsize < 1:
- *		1 + ((random_page_cost-1)/2) * relsize ** 2
- * These give the right asymptotic behavior (=> 1.0 as relpages becomes
- * small, => random_page_cost as it becomes large) and meet in the middle
- * with the estimate that the cache is about 50% effective for a relation
- * of the same size as effective_cache_size.  (XXX this is probably all
- * wrong, but I haven't been able to find any theory about how effective
+ *		seq_page_cost + ((random_page_cost-seq_page_cost)/2) * relsize ** 2
+ * These give the right asymptotic behavior (=> seq_page_cost as relpages
+ * becomes small, => random_page_cost as it becomes large) and meet in the
+ * middle with the estimate that the cache is about 50% effective for a
+ * relation of the same size as effective_cache_size.  (XXX this is probably
+ * all wrong, but I haven't been able to find any theory about how effective
  * a disk cache should be presumed to be.)
  */
 static Cost
 cost_nonsequential_access(double relpages)
 {
 	double		relsize;
+	double		random_delta;
 
 	/* don't crash on bad input data */
 	if (relpages <= 0.0 || effective_cache_size <= 0.0)
@@ -215,19 +217,17 @@ cost_nonsequential_access(double relpages)
 
 	relsize = relpages / effective_cache_size;
 
+	random_delta = (random_page_cost - seq_page_cost) * 0.5;
 	if (relsize >= 1.0)
-		return random_page_cost - (random_page_cost - 1.0) * 0.5 / relsize;
+		return random_page_cost - random_delta / relsize;
 	else
-		return 1.0 + (random_page_cost - 1.0) * 0.5 * relsize * relsize;
+		return seq_page_cost + random_delta * relsize * relsize;
 }
 
 /*
  * cost_index
  *	  Determines and returns the cost of scanning a relation using an index.
  *
- *	  NOTE: an indexscan plan node can actually represent several passes,
- *	  but here we consider the cost of just one pass.
- *
  * 'index' is the index to be used
  * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
  * 'is_injoin' is T if we are considering using the index scan as the inside
@@ -327,9 +327,9 @@ cost_index(IndexPath *path, PlannerInfo *root,
 	 * be just sT.	What's more, these will be sequential fetches, not the
 	 * random fetches that occur in the uncorrelated case.	So, depending on
 	 * the extent of correlation, we should estimate the actual I/O cost
-	 * somewhere between s * T * 1.0 and PF * random_cost.	We currently
-	 * interpolate linearly between these two endpoints based on the
-	 * correlation squared (XXX is that appropriate?).
+	 * somewhere between s * T * seq_page_cost and PF * random_page_cost.
+	 * We currently interpolate linearly between these two endpoints based on
+	 * the correlation squared (XXX is that appropriate?).
 	 *
 	 * In any case the number of tuples fetched is Ns.
 	 *----------
@@ -346,8 +346,10 @@ cost_index(IndexPath *path, PlannerInfo *root,
 	{
 		pages_fetched =
 			(2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
-		if (pages_fetched > T)
+		if (pages_fetched >= T)
 			pages_fetched = T;
+		else
+			pages_fetched = ceil(pages_fetched);
 	}
 	else
 	{
@@ -364,6 +366,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
 			pages_fetched =
 				b + (tuples_fetched - lim) * (T - b) / T;
 		}
+		pages_fetched = ceil(pages_fetched);
 	}
 
 	/*
@@ -373,7 +376,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
 	 * rather than using cost_nonsequential_access, since we've already
 	 * accounted for caching effects by using the Mackert model.
 	 */
-	min_IO_cost = ceil(indexSelectivity * T);
+	min_IO_cost = ceil(indexSelectivity * T) * seq_page_cost;
 	max_IO_cost = pages_fetched * random_page_cost;
 
 	/*
@@ -461,19 +464,21 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
 
 	T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
 	pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
-	if (pages_fetched > T)
+	if (pages_fetched >= T)
 		pages_fetched = T;
+	else
+		pages_fetched = ceil(pages_fetched);
 
 	/*
 	 * For small numbers of pages we should charge random_page_cost apiece,
 	 * while if nearly all the table's pages are being read, it's more
-	 * appropriate to charge 1.0 apiece.  The effect is nonlinear, too. For
-	 * lack of a better idea, interpolate like this to determine the cost per
-	 * page.
+	 * appropriate to charge seq_page_cost apiece.  The effect is nonlinear,
+	 * too. For lack of a better idea, interpolate like this to determine the
+	 * cost per page.
 	 */
 	if (pages_fetched >= 2.0)
 		cost_per_page = random_page_cost -
-			(random_page_cost - 1.0) * sqrt(pages_fetched / T);
+			(random_page_cost - seq_page_cost) * sqrt(pages_fetched / T);
 	else
 		cost_per_page = random_page_cost;
 
@@ -833,9 +838,9 @@ cost_sort(Path *path, PlannerInfo *root,
 		else
 			log_runs = 1.0;
 		npageaccesses = 2.0 * npages * log_runs;
-		/* Assume half are sequential (cost 1), half are not */
+		/* Assume half are sequential, half are not */
 		startup_cost += npageaccesses *
-			(1.0 + cost_nonsequential_access(npages)) * 0.5;
+			(seq_page_cost + cost_nonsequential_access(npages)) * 0.5;
 	}
 
 	/*
@@ -871,8 +876,8 @@ cost_material(Path *path,
 		double		npages = ceil(nbytes / BLCKSZ);
 
 		/* We'll write during startup and read during retrieval */
-		startup_cost += npages;
-		run_cost += npages;
+		startup_cost += seq_page_cost * npages;
+		run_cost += seq_page_cost * npages;
 	}
 
 	/*
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 5aeadce0917..c75ceef3732 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.205 2006/05/02 11:28:55 teodor Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.206 2006/06/05 02:49:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -4555,9 +4555,9 @@ genericcostestimate(PlannerInfo *root,
 	 * Compute the index access cost.
 	 *
 	 * Disk cost: our generic assumption is that the index pages will be read
-	 * sequentially, so they have cost 1.0 each, not random_page_cost.
+	 * sequentially, so they cost seq_page_cost each, not random_page_cost.
 	 */
-	*indexTotalCost = numIndexPages;
+	*indexTotalCost = seq_page_cost * numIndexPages;
 
 	/*
 	 * CPU cost: any complex expressions in the indexquals will need to be
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 1ff2e0c8318..9ef561d4c12 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut <peter_e@gmx.net>.
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.320 2006/05/21 20:10:42 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.321 2006/06/05 02:49:58 tgl Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -1595,57 +1595,63 @@ static struct config_int ConfigureNamesInt[] =
 static struct config_real ConfigureNamesReal[] =
 {
 	{
-		{"effective_cache_size", PGC_USERSET, QUERY_TUNING_COST,
-			gettext_noop("Sets the planner's assumption about size of the disk cache."),
-			gettext_noop("That is, the portion of the kernel's disk cache that "
-						 "will be used for PostgreSQL data files. This is measured in disk "
-						 "pages, which are normally 8 kB each.")
+		{"seq_page_cost", PGC_USERSET, QUERY_TUNING_COST,
+			gettext_noop("Sets the planner's estimate of the cost of a "
+						 "sequentially fetched disk page."),
+			NULL
 		},
-		&effective_cache_size,
-		DEFAULT_EFFECTIVE_CACHE_SIZE, 1, DBL_MAX, NULL, NULL
+		&seq_page_cost,
+		DEFAULT_SEQ_PAGE_COST, 0, DBL_MAX, NULL, NULL
 	},
 	{
 		{"random_page_cost", PGC_USERSET, QUERY_TUNING_COST,
-			gettext_noop("Sets the planner's estimate of the cost of a nonsequentially "
-						 "fetched disk page."),
-			gettext_noop("This is measured as a multiple of the cost of a "
-			  "sequential page fetch. A higher value makes it more likely a "
-						 "sequential scan will be used, a lower value makes it more likely an "
-						 "index scan will be used.")
+			gettext_noop("Sets the planner's estimate of the cost of a "
+						 "nonsequentially fetched disk page."),
+			NULL
 		},
 		&random_page_cost,
 		DEFAULT_RANDOM_PAGE_COST, 0, DBL_MAX, NULL, NULL
 	},
 	{
 		{"cpu_tuple_cost", PGC_USERSET, QUERY_TUNING_COST,
-			gettext_noop("Sets the planner's estimate of the cost of processing each tuple (row)."),
-			gettext_noop("This is measured as a fraction of the cost of a "
-						 "sequential page fetch.")
+			gettext_noop("Sets the planner's estimate of the cost of "
+						 "processing each tuple (row)."),
+			NULL
 		},
 		&cpu_tuple_cost,
 		DEFAULT_CPU_TUPLE_COST, 0, DBL_MAX, NULL, NULL
 	},
 	{
 		{"cpu_index_tuple_cost", PGC_USERSET, QUERY_TUNING_COST,
-			gettext_noop("Sets the planner's estimate of processing cost for each "
-						 "index tuple (row) during index scan."),
-			gettext_noop("This is measured as a fraction of the cost of a "
-						 "sequential page fetch.")
+			gettext_noop("Sets the planner's estimate of the cost of "
+						 "processing each index entry during an index scan."),
+			NULL
 		},
 		&cpu_index_tuple_cost,
 		DEFAULT_CPU_INDEX_TUPLE_COST, 0, DBL_MAX, NULL, NULL
 	},
 	{
 		{"cpu_operator_cost", PGC_USERSET, QUERY_TUNING_COST,
-			gettext_noop("Sets the planner's estimate of processing cost of each operator in WHERE."),
-			gettext_noop("This is measured as a fraction of the cost of a sequential "
-						 "page fetch.")
+			gettext_noop("Sets the planner's estimate of the cost of "
+						 "processing each operator or function call."),
+			NULL
 		},
 		&cpu_operator_cost,
 		DEFAULT_CPU_OPERATOR_COST, 0, DBL_MAX, NULL, NULL
 	},
 
 	{
+		{"effective_cache_size", PGC_USERSET, QUERY_TUNING_COST,
+			gettext_noop("Sets the planner's assumption about size of the disk cache."),
+			gettext_noop("That is, the portion of the kernel's disk cache that "
+						 "will be used for PostgreSQL data files. This is measured in disk "
+						 "pages, which are normally 8 kB each.")
+		},
+		&effective_cache_size,
+		DEFAULT_EFFECTIVE_CACHE_SIZE, 1, DBL_MAX, NULL, NULL
+	},
+
+	{
 		{"geqo_selection_bias", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("GEQO: selective pressure within the population."),
 			NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d7f32256d11..0499223d110 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -175,12 +175,12 @@
 
 # - Planner Cost Constants -
 
+#seq_page_cost = 1.0			# measured on an arbitrary scale
+#random_page_cost = 4.0			# same scale as above
+#cpu_tuple_cost = 0.01			# same scale as above
+#cpu_index_tuple_cost = 0.001		# same scale as above
+#cpu_operator_cost = 0.0025		# same scale as above
 #effective_cache_size = 1000		# typically 8KB each
-#random_page_cost = 4			# units are one sequential page fetch 
-					# cost
-#cpu_tuple_cost = 0.01			# (same)
-#cpu_index_tuple_cost = 0.001		# (same)
-#cpu_operator_cost = 0.0025		# (same)
 
 # - Genetic Query Optimizer -
author	Tom Lane <tgl@sss.pgh.pa.us>	2006-06-05 02:49:58 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2006-06-05 02:49:58 +0000
commit	eed6c9ed7e243948e440ed1388591b7fa28f5827 (patch)
tree	aef8821bdd57ed83e2c1b2ce27014e45beed0934 /src/backend
parent	a837851dc060515c1f7023f74d75156a80c2c936 (diff)
download	postgresql-eed6c9ed7e243948e440ed1388591b7fa28f5827.tar.gz postgresql-eed6c9ed7e243948e440ed1388591b7fa28f5827.zip