aboutsummaryrefslogtreecommitdiff
path: root/doc/src/sgml/ref/create_statistics.sgml
diff options
context:
space:
mode:
Diffstat (limited to 'doc/src/sgml/ref/create_statistics.sgml')
-rw-r--r--doc/src/sgml/ref/create_statistics.sgml43
1 files changed, 26 insertions, 17 deletions
diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml
index 92ee4e4efa7..854746de24d 100644
--- a/doc/src/sgml/ref/create_statistics.sgml
+++ b/doc/src/sgml/ref/create_statistics.sgml
@@ -79,11 +79,13 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="PARAMETER">statistics_na
<term><replaceable class="PARAMETER">statistic_type</replaceable></term>
<listitem>
<para>
- A statistic type to be computed in this statistics object. Currently
- supported types are <literal>ndistinct</literal>, which enables
- n-distinct coefficient tracking,
- and <literal>dependencies</literal>, which enables functional
- dependencies.
+ A statistic type to be computed in this statistics object.
+ Currently supported types are
+ <literal>ndistinct</literal>, which enables n-distinct statistics, and
+ <literal>dependencies</literal>, which enables functional
+ dependency statistics.
+ For more information, see <xref linkend="planner-stats-extended">
+ and <xref linkend="multivariate-statistics-examples">.
</para>
</listitem>
</varlistentry>
@@ -92,7 +94,8 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="PARAMETER">statistics_na
<term><replaceable class="PARAMETER">column_name</replaceable></term>
<listitem>
<para>
- The name of a table column to be included in the statistics object.
+ The name of a table column to be covered by the computed statistics.
+ At least two column names must be given.
</para>
</listitem>
</varlistentry>
@@ -114,7 +117,9 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="PARAMETER">statistics_na
<title>Notes</title>
<para>
- You must be the owner of a table to create or change statistics on it.
+ You must be the owner of a table to create a statistics object
+ reading it. Once created, however, the ownership of the statistics
+ object is independent of the underlying table(s).
</para>
</refsect1>
@@ -124,8 +129,8 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="PARAMETER">statistics_na
<para>
Create table <structname>t1</> with two functionally dependent columns, i.e.
knowledge of a value in the first column is sufficient for determining the
- value in the other column. Then functional dependencies are built on those
- columns:
+ value in the other column. Then functional dependency statistics are built
+ on those columns:
<programlisting>
CREATE TABLE t1 (
@@ -136,21 +141,25 @@ CREATE TABLE t1 (
INSERT INTO t1 SELECT i/100, i/500
FROM generate_series(1,1000000) s(i);
+ANALYZE t1;
+
+-- the number of matching rows will be drastically underestimated:
+EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0);
+
CREATE STATISTICS s1 (dependencies) ON a, b FROM t1;
ANALYZE t1;
--- valid combination of values
+-- now the rowcount estimate is more accurate:
EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0);
-
--- invalid combination of values
-EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 1);
</programlisting>
- Without functional-dependency statistics, the planner would make the
- same estimate of the number of matching rows for these two queries.
- With such statistics, it is able to tell that one case has matches
- and the other does not.
+ Without functional-dependency statistics, the planner would assume
+ that the two <literal>WHERE</> conditions are independent, and would
+ multiply their selectivities together to arrive at a much-too-small
+ rowcount estimate.
+ With such statistics, the planner recognizes that the <literal>WHERE</>
+ conditions are redundant and does not underestimate the rowcount.
</para>
</refsect1>