8 files changed, 421 insertions, 70 deletions
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 1738a5bf1d4..01885a5095b 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -1,6 +1,6 @@
 <!--
  Documentation of the system catalogs, directed toward PostgreSQL developers
- $Header: /cvsroot/pgsql/doc/src/sgml/catalogs.sgml,v 2.15 2001/04/20 15:52:33 thomas Exp $
+ $Header: /cvsroot/pgsql/doc/src/sgml/catalogs.sgml,v 2.16 2001/05/07 00:43:14 tgl Exp $
  -->
 
 <chapter id="catalogs">
@@ -16,7 +16,7 @@
    <productname>PostgreSQL</productname>'s system catalogs are regular
    tables.  You can drop and recreate the tables, add columns, insert
    and update values, and severely mess up your system that way.
-   Normally one never has to change the system catalogs by hand, there
+   Normally one should not change the system catalogs by hand, there
    are always SQL commands to do that.  (For example, <command>CREATE
    DATABASE</command> inserts a row into the
    <structname>pg_database</structname> catalog -- and actually
@@ -185,7 +185,7 @@
   <para>
    <structname>pg_aggregate</structname> stores information about
    aggregate functions.  An aggregate function is a function that
-   operates on a set of values (typically one column from each the row
+   operates on a set of values (typically one column from each row
    that matches a query condition) and returns a single value computed
    from all these values.  Typical aggregate functions are
    <function>sum</function>, <function>count</function>, and
@@ -233,7 +233,7 @@
       <entry>aggbasetype</entry>
       <entry><type>oid</type></entry>
       <entry>pg_type.oid</entry>
-      <entry>The type on which this function operates when invoked from SQL</entry>
+      <entry>The input datatype for this aggregate function</entry>
      </row>
      <row>
       <entry>aggtranstype</entry>
@@ -269,7 +269,7 @@
 
   <para>
    An aggregate function is identified through name
-   <emphasis>and</emphasis> argument type.  Hence aggname and aggname
+   <emphasis>and</emphasis> argument type.  Hence aggname and aggbasetype
    are the composite primary key.
   </para>
 
@@ -311,11 +311,8 @@
      <row>
       <entry>adnum</entry>
       <entry><type>int2</type></entry>
-      <entry></entry>
-      <entry>
-       The number of the column; see
-       <structname>pg_attribute</structname>.<structfield>pg_attnum</structfield>
-      </entry>
+      <entry>pg_attribute.attnum</entry>
+      <entry>The number of the column</entry>
      </row>
 
      <row>
@@ -390,20 +387,18 @@
      </row>
 
      <row>
-      <entry>attdispersion</entry>
-      <entry><type>float4</type></entry>
+      <entry>attstattarget</entry>
+      <entry><type>int4</type></entry>
       <entry></entry>
       <entry>
-       <structfield>attdispersion</structfield> is the dispersion
-       statistic of the column (0.0 to 1.0), or zero if the statistic
-       has not been calculated, or -1.0 if <command>VACUUM</command>
-       found that the column contains no duplicate entries (in which
-       case the dispersion should be taken as
-       1.0/<symbol>numberOfRows</symbol> for the current table size).
-       The -1.0 hack is useful because the number of rows may be
-       updated more often than
-       <structfield>attdispersion</structfield> is. We assume that the
-       column will retain its no-duplicate-entry property.
+       <structfield>attstattarget</structfield> controls the level of detail
+       of statistics accumulated for this column by
+       <command>ANALYZE</command>.
+       A zero value indicates that no statistics should be collected.
+       The exact meaning of positive values is datatype-dependent.
+       For scalar datatypes, <structfield>attstattarget</structfield>
+       is both the target number of <quote>most common values</quote>
+       to collect, and the target number of histogram bins to create.
       </entry>
      </row>
 
@@ -430,10 +425,12 @@
      </row>
 
      <row>
-      <entry>attnelems</entry>
+      <entry>attndims</entry>
       <entry><type>int4</type></entry>
       <entry></entry>
-      <entry>Number of dimensions, if the column is an array</entry>
+      <entry>
+       Number of dimensions, if the column is an array; otherwise 0.
+      </entry>
      </row>
 
      <row>
@@ -610,18 +607,22 @@
       <entry></entry>
       <entry>
        Size of the on-disk representation of this table in pages (size
-       <symbol>BLCKSZ</symbol>).  This is only an approximate value
-       which is calculated during vacuum.
+       <symbol>BLCKSZ</symbol>).
+       This is only an estimate used by the planner.
+       It is updated by <command>VACUUM</command>,
+       <command>ANALYZE</command>, and <command>CREATE INDEX</command>.
       </entry>
      </row>
 
      <row>
       <entry>reltuples</entry>
-      <entry><type>int4</type></entry>
+      <entry><type>float4</type></entry>
       <entry></entry>
       <entry>
-       Number of tuples in the table.  This is only an estimate used
-       by the planner, updated by <command>VACUUM</command>.
+       Number of tuples in the table.
+       This is only an estimate used by the planner.
+       It is updated by <command>VACUUM</command>,
+       <command>ANALYZE</command>, and <command>CREATE INDEX</command>.
       </entry>
      </row>
 
@@ -1671,6 +1672,130 @@
  </section>
 
 
+ <section id="catalog-pg-statistic">
+  <title>pg_statistic</title>
+
+  <para>
+   <structname>pg_statistic</structname> stores statistical data about
+   the contents of the database.  Entries are created by
+   <command>ANALYZE</command> and subsequently used by the query planner.
+   There is one entry for each table column that has been analyzed.
+   Note that all the statistical data is inherently approximate,
+   even assuming that it is up-to-date.
+  </para>
+
+  <para>
+   Since different kinds of statistics may be appropriate for different
+   kinds of data, <structname>pg_statistic</structname> is designed not
+   to assume very much about what sort of statistics it stores.  Only
+   extremely general statistics (such as NULL-ness) are given dedicated
+   columns in <structname>pg_statistic</structname>.  Everything else
+   is stored in "slots", which are groups of associated columns whose
+   content is identified by a code number in one of the slot's columns.
+   For more information see
+   <filename>src/include/catalog/pg_statistic.h</filename>.
+  </para>
+
+  <table>
+   <title>pg_statistic Columns</title>
+
+   <tgroup cols=4>
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Type</entry>
+      <entry>References</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry>starelid</entry>
+      <entry><type>oid</type></entry>
+      <entry>pg_class.oid</entry>
+      <entry>The table that the described column belongs to</entry>
+     </row>
+
+     <row>
+      <entry>staattnum</entry>
+      <entry><type>int2</type></entry>
+      <entry>pg_attribute.attnum</entry>
+      <entry>The number of the described column</entry>
+     </row>
+
+     <row>
+      <entry>stanullfrac</entry>
+      <entry><type>float4</type></entry>
+      <entry></entry>
+      <entry>The fraction of the column's entries that are NULL</entry>
+     </row>
+
+     <row>
+      <entry>stawidth</entry>
+      <entry><type>int4</type></entry>
+      <entry></entry>
+      <entry>The average stored width, in bytes, of non-NULL entries</entry>
+     </row>
+
+     <row>
+      <entry>stadistinct</entry>
+      <entry><type>float4</type></entry>
+      <entry></entry>
+      <entry>The number of distinct non-NULL data values in the column.
+      A value greater than zero is the actual number of distinct values.
+      A value less than zero is the negative of a fraction of the number
+      of rows in the table (for example, a column in which values appear about
+      twice on the average could be represented by stadistinct = -0.5).
+      A zero value means the number of distinct values is unknown.
+      </entry>
+     </row>
+
+     <row>
+      <entry>stakindN</entry>
+      <entry><type>int2</type></entry>
+      <entry></entry>
+      <entry>A code number indicating the kind of statistics stored in the Nth
+      "slot" of the <structname>pg_statistic</structname> row.
+      </entry>
+     </row>
+
+     <row>
+      <entry>staopN</entry>
+      <entry><type>oid</type></entry>
+      <entry>pg_operator.oid</entry>
+      <entry>An operator used to derive the statistics stored in the
+      Nth "slot".  For example, a histogram slot would show the "&lt;"
+      operator that defines the sort order of the data.
+      </entry>
+     </row>
+
+     <row>
+      <entry>stanumbersN</entry>
+      <entry><type>float4[]</type></entry>
+      <entry></entry>
+      <entry>Numerical statistics of the appropriate kind for the Nth
+      "slot", or NULL if the slot kind does not involve numerical values.
+      </entry>
+     </row>
+
+     <row>
+      <entry>stavaluesN</entry>
+      <entry><type>text[]</type></entry>
+      <entry></entry>
+      <entry>Column data values of the appropriate kind for the Nth
+      "slot", or NULL if the slot kind does not store any data values.
+      For datatype independence, all column data values are converted
+      to external textual form and stored as TEXT datums.
+      </entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+
+ </section>
+
+
  <section id="catalog-pg-type">
   <title>pg_type</title>
 
diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml
index 32ecd9e6695..42cab244ab8 100644
--- a/doc/src/sgml/indices.sgml
+++ b/doc/src/sgml/indices.sgml
@@ -1,4 +1,4 @@
-<!-- $Header: /cvsroot/pgsql/doc/src/sgml/indices.sgml,v 1.14 2001/02/20 22:27:56 petere Exp $ -->
+<!-- $Header: /cvsroot/pgsql/doc/src/sgml/indices.sgml,v 1.15 2001/05/07 00:43:14 tgl Exp $ -->
 
 <chapter id="indices">
  <title id="indices-title">Indices</title>
@@ -71,7 +71,7 @@ CREATE INDEX test1_id_index ON test1 (id);
    Once the index is created, no further intervention is required: the
    system will use the index when it thinks it would be more efficient
    than a sequential table scan.  But you may have to run the
-   <command>VACUUM ANALYZE</command> command regularly to update
+   <command>ANALYZE</command> command regularly to update
    statistics to allow the query planner to make educated decisions.
    Also read <xref linkend="performance-tips"> for information about
    how to find out whether an index is used and when and why the
diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml
index 0088896131e..dea65e98f2e 100644
--- a/doc/src/sgml/ref/allfiles.sgml
+++ b/doc/src/sgml/ref/allfiles.sgml
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/ref/allfiles.sgml,v 1.27 2001/01/13 03:11:12 petere Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/allfiles.sgml,v 1.28 2001/05/07 00:43:14 tgl Exp $
 Postgres documentation
 Complete list of usable sgml source files in this directory.
 -->
@@ -40,6 +40,7 @@ Complete list of usable sgml source files in this directory.
 <!entity alterGroup         system "alter_group.sgml">
 <!entity alterTable         system "alter_table.sgml">
 <!entity alterUser          system "alter_user.sgml">
+<!entity analyze            system "analyze.sgml">
 <!entity begin              system "begin.sgml">
 <!entity checkpoint         system "checkpoint.sgml">
 <!entity close              system "close.sgml">
diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index 4c258c81650..21fc8c2ebdb 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/ref/alter_table.sgml,v 1.22 2001/03/05 18:42:55 momjian Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/alter_table.sgml,v 1.23 2001/05/07 00:43:15 tgl Exp $
 Postgres documentation
 -->
 
@@ -29,7 +29,9 @@ ALTER TABLE [ ONLY ] <replaceable class="PARAMETER">table</replaceable> [ * ]
 ALTER TABLE [ ONLY ] <replaceable class="PARAMETER">table</replaceable> [ * ]
     ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> { SET DEFAULT <replaceable
     class="PARAMETER">value</replaceable> | DROP DEFAULT }
-ALTER TABLE <replaceable class="PARAMETER">table</replaceable> [ * ]
+ALTER TABLE [ ONLY ] <replaceable class="PARAMETER">table</replaceable> [ * ]
+    ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> SET STATISTICS <replaceable class="PARAMETER">integer</replaceable>
+ALTER TABLE [ ONLY ] <replaceable class="PARAMETER">table</replaceable> [ * ]
     RENAME [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> TO <replaceable
     class="PARAMETER">newcolumn</replaceable>
 ALTER TABLE <replaceable class="PARAMETER">table</replaceable>
@@ -159,9 +161,14 @@ ALTER TABLE <replaceable class="PARAMETER">table</replaceable>
    <command>ALTER TABLE</command> changes the definition of an existing table.
    The <literal>ADD COLUMN</literal> form adds a new column to the table
    using the same syntax as <xref linkend="SQL-CREATETABLE"
-   endterm="SQL-CREATETABLE-title">. The <literal>ALTER COLUMN</literal> form
-   allows you to set or remove the default for the column. Note that defaults
-   only apply to newly inserted rows.
+   endterm="SQL-CREATETABLE-title">.
+   The <literal>ALTER COLUMN SET/DROP DEFAULT</literal> forms
+   allow you to set or remove the default for the column. Note that defaults
+   only apply to subsequent <command>INSERT</command> commands; they do not
+   cause rows already in the table to change.
+   The <literal>ALTER COLUMN SET STATISTICS</literal> form allows you to
+   set the statistics-gathering target for subsequent
+   <xref linkend="sql-analyze" endterm="sql-analyze-title"> operations.
    The <literal>RENAME</literal> clause causes the name of a table or column
    to change without changing any of the data contained in
    the affected table. Thus, the table or column will
@@ -170,7 +177,7 @@ ALTER TABLE <replaceable class="PARAMETER">table</replaceable>
    The ADD <replaceable class="PARAMETER">table constraint definition</replaceable> clause 
    adds a new constraint to the table using the same syntax as <xref
    linkend="SQL-CREATETABLE" endterm="SQL-CREATETABLE-title">. 
-   The OWNER clause chnages the owner of the table to the user <replaceable class="PARAMETER">
+   The OWNER clause changes the owner of the table to the user <replaceable class="PARAMETER">
    new user</replaceable>.
   </para>
 
@@ -190,10 +197,11 @@ ALTER TABLE <replaceable class="PARAMETER">table</replaceable>
    </para>
 
    <para>
-    In the current implementation, default and constraint clauses for the
+    In the current implementation of <literal>ADD COLUMN</literal>,
+    default and constraint clauses for the
     new column will be ignored. You can use the <literal>SET DEFAULT</literal>
     form of <command>ALTER TABLE</command> to set the default later.
-    (You will also have to update the already existing rows to the
+    (You may also want to update the already existing rows to the
     new default value, using <xref linkend="sql-update"
     endterm="sql-update-title">.)
    </para>
@@ -210,7 +218,7 @@ ALTER TABLE <replaceable class="PARAMETER">table</replaceable>
 
    <para>
     You must own the table in order to change it.
-    Renaming any  part  of  the schema of a system
+    Changing any  part  of  the schema of a system
     catalog is not permitted.
     The <citetitle>PostgreSQL User's Guide</citetitle> has further
     information on inheritance.
diff --git a/doc/src/sgml/ref/analyze.sgml b/doc/src/sgml/ref/analyze.sgml
new file mode 100644
index 00000000000..57d3213d614
--- /dev/null
+++ b/doc/src/sgml/ref/analyze.sgml
@@ -0,0 +1,219 @@
+<!--
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/analyze.sgml,v 1.1 2001/05/07 00:43:15 tgl Exp $
+Postgres documentation
+-->
+
+<refentry id="SQL-ANALYZE">
+ <refmeta>
+  <refentrytitle id="sql-analyze-title">
+   ANALYZE
+  </refentrytitle>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+ <refnamediv>
+  <refname>
+   ANALYZE
+  </refname>
+  <refpurpose>
+   Collect statistics about a <productname>Postgres</productname> database
+  </refpurpose>
+ </refnamediv>
+ <refsynopsisdiv>
+  <refsynopsisdivinfo>
+   <date>2001-05-04</date>
+  </refsynopsisdivinfo>
+  <synopsis>
+ANALYZE [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> [ (<replaceable class="PARAMETER">column</replaceable> [, ...] ) ] ]
+  </synopsis>
+
+  <refsect2 id="R2-SQL-ANALYZE-1">
+   <refsect2info>
+    <date>2001-05-04</date>
+   </refsect2info>
+   <title>
+    Inputs
+   </title>
+
+   <para>
+    <variablelist>
+     <varlistentry>
+      <term>VERBOSE</term>
+      <listitem>
+       <para>
+	Enables display of progress messages.
+       </para>
+      </listitem>
+     </varlistentry>
+     <varlistentry>
+      <term><replaceable class="PARAMETER">table</replaceable></term>
+      <listitem>
+       <para>
+	The name of a specific table to analyze. Defaults to all tables.
+       </para>
+      </listitem>
+     </varlistentry>
+     <varlistentry>
+      <term><replaceable class="PARAMETER">column</replaceable></term>
+      <listitem>
+       <para>
+	The name of a specific column to analyze. Defaults to all columns.
+       </para>
+      </listitem>
+     </varlistentry>
+    </variablelist>
+   </para>
+  </refsect2>
+
+  <refsect2 id="R2-SQL-ANALYZE-2">
+   <refsect2info>
+    <date>2001-05-04</date>
+   </refsect2info>
+   <title>
+    Outputs
+   </title>
+   <para>
+
+    <variablelist>
+     <varlistentry>
+      <term><computeroutput>
+<returnvalue>ANALYZE</returnvalue>
+       </computeroutput></term>
+      <listitem>
+       <para>
+	The command is complete.
+       </para>
+      </listitem>
+     </varlistentry>
+
+    </variablelist>
+   </para>
+  </refsect2>
+ </refsynopsisdiv>
+
+ <refsect1 id="R1-SQL-ANALYZE-1">
+  <refsect1info>
+   <date>2001-05-04</date>
+  </refsect1info>
+  <title>
+   Description
+  </title>
+  <para>
+   <command>ANALYZE</command> collects statistics about the contents of
+   <productname>Postgres</productname> tables, and stores the results in
+   the system table <literal>pg_statistic</literal>.  Subsequently,
+   the query planner uses the statistics to help determine the most efficient
+   execution plans for queries.
+  </para>
+
+  <para>
+   With no parameter, <command>ANALYZE</command> examines every table in the
+   current database.  With a parameter, <command>ANALYZE</command> examines
+   only that table.  It is further possible to give a list of column names,
+   in which case only the statistics for those columns are updated.
+  </para>
+
+  <refsect2 id="R2-SQL-ANALYZE-3">
+   <refsect2info>
+    <date>2001-05-04</date>
+   </refsect2info>
+   <title>
+    Notes
+   </title>
+
+  <para>
+   It is a good idea to run <command>ANALYZE</command> periodically, or
+   just after making major changes in the contents of a table.  Accurate
+   statistics will help the planner to choose the most appropriate query
+   plan, and thereby improve the speed of query processing.  A common
+   strategy is to run <command>VACUUM</command> and <command>ANALYZE</command>
+   once a day during a low-usage time of day.
+  </para>
+
+  <para>
+   Unlike <xref linkend="sql-vacuum" endterm="sql-vacuum-title">,
+   <command>ANALYZE</command> requires
+   only a read lock on the target table, so it can run in parallel with
+   other activity on the table.
+  </para>
+
+  <para>
+   For large tables, <command>ANALYZE</command> takes a random sample of the
+   table contents, rather than examining every row.  This allows even very
+   large tables to be analyzed in a small amount of time.  Note however
+   that the statistics are only approximate, and will change slightly each
+   time <command>ANALYZE</command> is run, even if the actual table contents
+   did not change.  This may result in small changes in the planner's
+   estimated costs shown by <command>EXPLAIN</command>.
+  </para>
+
+  <para>
+   The collected statistics usually include a list of some of the most common
+   values in each column and a histogram showing the approximate data
+   distribution in each column.  One or both of these may be omitted if
+   <command>ANALYZE</command> deems them uninteresting (for example, in
+   a unique-key column, there are no common values) or if the column
+   datatype does not support the appropriate operators.
+  </para>
+
+  <para>
+   The extent of analysis can be controlled by adjusting the per-column
+   statistics target with <command>ALTER TABLE ALTER COLUMN SET
+   STATISTICS</command> (see
+   <xref linkend="sql-altertable" endterm="sql-altertable-title">).  The
+   target value sets the maximum number of entries in the most-common-value
+   list and the maximum number of bins in the histogram.  The default
+   target value is 10, but this can be adjusted up or down to trade off
+   accuracy of planner estimates against the time taken for
+   <command>ANALYZE</command> and the
+   amount of space occupied in <literal>pg_statistic</literal>.
+   In particular, setting the statistics target to zero disables collection of
+   statistics for that column.  It may be useful to do that for columns that
+   are never used as part of the WHERE, GROUP BY, or ORDER BY clauses of
+   queries, since the planner will have no use for statistics on such columns.
+  </para>
+
+  <para>
+   The largest statistics target among the columns being analyzed determines
+   the number of table rows sampled to prepare the statistics.  Increasing
+   the target causes a proportional increase in the time and space needed
+   to do <command>ANALYZE</command>.
+  </para>
+
+  </refsect2>
+ </refsect1>
+
+ <refsect1 id="R1-SQL-ANALYZE-3">
+  <title>
+   Compatibility
+  </title>
+
+  <refsect2 id="R2-SQL-ANALYZE-4">
+   <refsect2info>
+    <date>2001-05-04</date>
+   </refsect2info>
+   <title>
+    SQL92
+   </title>
+   <para>
+    There is no <command>ANALYZE</command> statement in <acronym>SQL92</acronym>.
+   </para>
+  </refsect2>
+ </refsect1>
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:nil
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:1
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:"../reference.ced"
+sgml-exposed-tags:nil
+sgml-local-catalogs:"/usr/lib/sgml/catalog"
+sgml-local-ecat-files:nil
+End:
+-->
diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml
index 51cb8a9ffda..cbb182466ea 100644
--- a/doc/src/sgml/ref/vacuum.sgml
+++ b/doc/src/sgml/ref/vacuum.sgml
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/ref/vacuum.sgml,v 1.13 2001/01/13 23:58:55 petere Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/vacuum.sgml,v 1.14 2001/05/07 00:43:15 tgl Exp $
 Postgres documentation
 -->
 
@@ -15,15 +15,15 @@ Postgres documentation
    VACUUM
   </refname>
   <refpurpose>
-   Clean and analyze a <productname>Postgres</productname> database
+   Clean and optionally analyze a <productname>Postgres</productname> database
   </refpurpose>
  </refnamediv>
  <refsynopsisdiv>
   <refsynopsisdivinfo>
-   <date>1999-07-20</date>
+   <date>2001-05-04</date>
   </refsynopsisdivinfo>
   <synopsis>
-VACUUM [ VERBOSE ] [ ANALYZE ] [ <replaceable class="PARAMETER">table</replaceable> ]
+VACUUM [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> ]
 VACUUM [ VERBOSE ] ANALYZE [ <replaceable class="PARAMETER">table</replaceable> [ (<replaceable class="PARAMETER">column</replaceable> [, ...] ) ] ]
   </synopsis>
 
@@ -49,7 +49,7 @@ VACUUM [ VERBOSE ] ANALYZE [ <replaceable class="PARAMETER">table</replaceable>
       <term>ANALYZE</term>
       <listitem>
        <para>
-	Updates column statistics used by the optimizer to
+	Updates statistics used by the optimizer to
 	determine the most efficient way to execute a query.
        </para>
       </listitem>
@@ -90,7 +90,7 @@ VACUUM [ VERBOSE ] ANALYZE [ <replaceable class="PARAMETER">table</replaceable>
        </computeroutput></term>
       <listitem>
        <para>
-	The command has been accepted and the database is being cleaned.
+	The command is complete.
        </para>
       </listitem>
      </varlistentry>
@@ -144,28 +144,26 @@ NOTICE:  Index <replaceable class="PARAMETER">index</replaceable>: Pages 28;
    Description
   </title>
   <para>
-   <command>VACUUM</command> serves two purposes in 
-   <productname>Postgres</productname> as both a means to reclaim storage and
-   also a means to collect information for the optimizer.
+   <command>VACUUM</command> reclaims storage occupied by deleted tuples.
+   In normal <productname>Postgres</productname> operation, tuples that
+   are DELETEd or obsoleted by UPDATE are not physically removed from
+   their table; they remain present until a <command>VACUUM</command> is
+   done.  Therefore it's necessary to do <command>VACUUM</command>
+   periodically, especially on frequently-updated tables.
   </para>
 
   <para>
-   <command>VACUUM</command> opens every table in the database,
-   cleans out records from rolled back transactions, and updates statistics in the
-   system catalogs.  The statistics maintained include the number of
-   tuples and number of pages stored in all tables.
-  </para>
-
-
-  <para>
-   <command>VACUUM ANALYZE</command> collects statistics representing the
-   dispersion of the data in each column.
-   This information is valuable when several query execution paths are possible.
+   With no parameter, <command>VACUUM</command> processes every table in the
+   current database.  With a parameter, <command>VACUUM</command> processes
+   only that table.
   </para>
 
   <para>
-   Running <command>VACUUM</command>
-   periodically will increase the speed of the database in processing user queries.
+   <command>VACUUM ANALYZE</command> performs a <command>VACUUM</command>
+   and then an <command>ANALYZE</command> for each selected table.  This
+   is a handy combination form for routine maintenance scripts.  See
+   <xref linkend="sql-analyze" endterm="sql-analyze-title">
+   for more details about its processing.
   </para>
 
   <refsect2 id="R2-SQL-VACUUM-3">
@@ -175,16 +173,15 @@ NOTICE:  Index <replaceable class="PARAMETER">index</replaceable>: Pages 28;
    <title>
     Notes
    </title>
-   <para>
-    The open database is the target for <command>VACUUM</command>.
-   </para>
+
    <para>
     We recommend that active production databases be
     <command>VACUUM</command>-ed nightly, in order to remove
     expired rows. After copying a large table into
     <productname>Postgres</productname> or after deleting a large number
     of records, it may be a good idea to issue a <command>VACUUM
-    ANALYZE</command> query. This will update the system catalogs with
+    ANALYZE</command> command for the affected table. This will update the
+    system catalogs with
     the results of all recent changes, and allow the
     <productname>Postgres</productname> query optimizer to make better
     choices in planning user queries.
diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml
index b92ee0868d0..9a977a6515c 100644
--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -1,5 +1,5 @@
 <!-- reference.sgml
-$Header: /cvsroot/pgsql/doc/src/sgml/reference.sgml,v 1.15 2001/03/24 13:21:14 petere Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/reference.sgml,v 1.16 2001/05/07 00:43:14 tgl Exp $
 
 PostgreSQL Reference Manual
 -->
@@ -26,6 +26,7 @@ PostgreSQL Reference Manual
    &alterGroup;
    &alterTable;
    &alterUser;
+   &analyze;
    &begin;
    &checkpoint;
    &close;
diff --git a/doc/src/sgml/xoper.sgml b/doc/src/sgml/xoper.sgml
index d38e78a4e1a..57d8bb79c28 100644
--- a/doc/src/sgml/xoper.sgml
+++ b/doc/src/sgml/xoper.sgml
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/xoper.sgml,v 1.11 2000/09/29 20:21:34 petere Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/xoper.sgml,v 1.12 2001/05/07 00:43:14 tgl Exp $
 -->
 
  <Chapter Id="xoper">
@@ -244,7 +244,7 @@ SELECT (a + b) AS c FROM test_complex;
     only a small fraction.  '&lt;' will accept a fraction that depends on
     where the given constant falls in the range of values for that table
     column (which, it just so happens, is information collected by
-    VACUUM ANALYZE and made available to the selectivity estimator).
+    <command>ANALYZE</command> and made available to the selectivity estimator).
     '&lt;=' will accept a slightly larger fraction than '&lt;' for the same
     comparison constant, but they're close enough to not be worth
     distinguishing, especially since we're not likely to do better than a