diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2005-03-27 23:53:05 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2005-03-27 23:53:05 +0000 |
commit | bf3dbb5881e9b886ee9fe84bca2153c698eea885 (patch) | |
tree | eaaf385364adebc2489c72f30e533f5fe60748ec /doc/src | |
parent | 351519affcffb636de68c4872521c9ac22faa228 (diff) | |
download | postgresql-bf3dbb5881e9b886ee9fe84bca2153c698eea885.tar.gz postgresql-bf3dbb5881e9b886ee9fe84bca2153c698eea885.zip |
First steps towards index scans with heap access decoupled from index
access: define new index access method functions 'amgetmulti' that can
fetch multiple TIDs per call. (The functions exist but are totally
untested as yet.) Since I was modifying pg_am anyway, remove the
no-longer-needed 'rel' parameter from amcostestimate functions, and
also remove the vestigial amowner column that was creating useless
work for Alvaro's shared-object-dependencies project.
Initdb forced due to changes in pg_am.
Diffstat (limited to 'doc/src')
-rw-r--r-- | doc/src/sgml/catalogs.sgml | 28 | ||||
-rw-r--r-- | doc/src/sgml/indexam.sgml | 85 |
2 files changed, 74 insertions, 39 deletions
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 7cfca6f1182..2dc8b300956 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1,6 +1,6 @@ <!-- Documentation of the system catalogs, directed toward PostgreSQL developers - $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.96 2005/02/13 03:04:15 tgl Exp $ + $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.97 2005/03/27 23:52:51 tgl Exp $ --> <chapter id="catalogs"> @@ -317,13 +317,6 @@ </row> <row> - <entry><structfield>amowner</structfield></entry> - <entry><type>int4</type></entry> - <entry><literal><link linkend="catalog-pg-shadow"><structname>pg_shadow</structname></link>.usesysid</literal></entry> - <entry>User ID of the owner (currently not used)</entry> - </row> - - <row> <entry><structfield>amstrategies</structfield></entry> <entry><type>int2</type></entry> <entry></entry> @@ -374,24 +367,31 @@ </row> <row> - <entry><structfield>amgettuple</structfield></entry> + <entry><structfield>aminsert</structfield></entry> <entry><type>regproc</type></entry> <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry> - <entry><quote>Next valid tuple</quote> function</entry> + <entry><quote>Insert this tuple</quote> function</entry> </row> <row> - <entry><structfield>aminsert</structfield></entry> + <entry><structfield>ambeginscan</structfield></entry> <entry><type>regproc</type></entry> <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry> - <entry><quote>Insert this tuple</quote> function</entry> + <entry><quote>Start new scan</quote> function</entry> </row> <row> - <entry><structfield>ambeginscan</structfield></entry> + <entry><structfield>amgettuple</structfield></entry> <entry><type>regproc</type></entry> <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry> - <entry><quote>Start new scan</quote> function</entry> + <entry><quote>Next valid tuple</quote> function</entry> + </row> + + <row> + <entry><structfield>amgetmulti</structfield></entry> + <entry><type>regproc</type></entry> + <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry> + <entry><quote>Fetch multiple tuples</quote> function</entry> </row> <row> diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index d6b83060485..ef1e37a8260 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -1,5 +1,5 @@ <!-- -$PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.2 2005/03/21 01:23:55 tgl Exp $ +$PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.3 2005/03/27 23:52:51 tgl Exp $ --> <chapter id="indexam"> @@ -252,6 +252,28 @@ amgettuple (IndexScanDesc scan, <para> <programlisting> +boolean +amgetmulti (IndexScanDesc scan, + ItemPointer tids, + int32 max_tids, + int32 *returned_tids); +</programlisting> + Fetch multiple tuples in the given scan. Returns TRUE if the scan should + continue, FALSE if no matching tuples remain. <literal>tids</> points to + a caller-supplied array of <literal>max_tids</> + <structname>ItemPointerData</> records, which the call fills with TIDs of + matching tuples. <literal>*returned_tids</> is set to the number of TIDs + actually returned. This can be less than <literal>max_tids</>, or even + zero, even when the return value is TRUE. (This provision allows the + access method to choose the most efficient stopping points in its scan, + for example index page boundaries.) <function>amgetmulti</> and + <function>amgettuple</> cannot be used in the same index scan; there + are other restrictions too when using <function>amgetmulti</>, as explained + in <xref linkend="index-scanning">. + </para> + + <para> +<programlisting> void amrescan (IndexScanDesc scan, ScanKey key); @@ -297,7 +319,6 @@ amrestrpos (IndexScanDesc scan); <programlisting> void amcostestimate (Query *root, - RelOptInfo *rel, IndexOptInfo *index, List *indexQuals, Cost *indexStartupCost, @@ -407,6 +428,25 @@ amcostestimate (Query *root, true, insertions or deletions from other backends must be handled as well.) </para> + <para> + Instead of using <function>amgettuple</>, an index scan can be done with + <function>amgetmulti</> to fetch multiple tuples per call. This can be + noticeably more efficient than <function>amgettuple</> because it allows + avoiding lock/unlock cycles within the access method. In principle + <function>amgetmulti</> should have the same effects as repeated + <function>amgettuple</> calls, but we impose several restrictions to + simplify matters. In the first place, <function>amgetmulti</> does not + take a <literal>direction</> argument, and therefore it does not support + backwards scan nor intrascan reversal of direction. The access method + need not support marking or restoring scan positions during an + <function>amgetmulti</> scan, either. (These restrictions cost little + since it would be difficult to use these features in an + <function>amgetmulti</> scan anyway: adjusting the caller's buffered + list of TIDs would be complex.) Finally, <function>amgetmulti</> does + not guarantee any locking of the returned tuples, with implications + spelled out in <xref linkend="index-locking">. + </para> + </sect1> <sect1 id="index-locking"> @@ -515,10 +555,15 @@ amcostestimate (Query *root, and only visit the heap tuples sometime later, requires much less index locking overhead and may allow a more efficient heap access pattern. Per the above analysis, we must use the synchronous approach for - non-MVCC-compliant snapshots, but an asynchronous scan would be safe - for a query using an MVCC snapshot. This possibility is not exploited - as of <productname>PostgreSQL</productname> 8.0, but it is likely to be - investigated soon. + non-MVCC-compliant snapshots, but an asynchronous scan is workable + for a query using an MVCC snapshot. + </para> + + <para> + In an <function>amgetmulti</> index scan, the access method need not + guarantee to keep an index pin on any of the returned tuples. (It would be + impractical to pin more than the last one anyway.) Therefore + it is only safe to use such scans with MVCC-compliant snapshots. </para> </sect1> @@ -611,7 +656,6 @@ amcostestimate (Query *root, <programlisting> void amcostestimate (Query *root, - RelOptInfo *rel, IndexOptInfo *index, List *indexQuals, Cost *indexStartupCost, @@ -633,19 +677,10 @@ amcostestimate (Query *root, </varlistentry> <varlistentry> - <term>rel</term> - <listitem> - <para> - The relation the index is on. - </para> - </listitem> - </varlistentry> - - <varlistentry> <term>index</term> <listitem> <para> - The index itself. + The index being considered. </para> </listitem> </varlistentry> @@ -714,19 +749,19 @@ amcostestimate (Query *root, <para> The index access costs should be computed in the units used by - <filename>src/backend/optimizer/path/costsize.c</filename>: a sequential disk block fetch - has cost 1.0, a nonsequential fetch has cost random_page_cost, and - the cost of processing one index row should usually be taken as - cpu_index_tuple_cost (which is a user-adjustable optimizer parameter). - In addition, an appropriate multiple of cpu_operator_cost should be charged + <filename>src/backend/optimizer/path/costsize.c</filename>: a sequential + disk block fetch has cost 1.0, a nonsequential fetch has cost + <varname>random_page_cost</>, and the cost of processing one index row + should usually be taken as <varname>cpu_index_tuple_cost</>. In addition, + an appropriate multiple of <varname>cpu_operator_cost</> should be charged for any comparison operators invoked during index processing (especially evaluation of the indexQuals themselves). </para> <para> The access costs should include all disk and CPU costs associated with - scanning the index itself, but NOT the costs of retrieving or processing - the parent-table rows that are identified by the index. + scanning the index itself, but <emphasis>not</> the costs of retrieving or + processing the parent-table rows that are identified by the index. </para> <para> @@ -764,7 +799,7 @@ amcostestimate (Query *root, <programlisting> *indexSelectivity = clauselist_selectivity(root, indexQuals, - rel->relid, JOIN_INNER); + index->rel->relid, JOIN_INNER); </programlisting> </para> </step> |