aboutsummaryrefslogtreecommitdiff
path: root/doc/src/sgml/intarray.sgml
diff options
context:
space:
mode:
Diffstat (limited to 'doc/src/sgml/intarray.sgml')
-rw-r--r--doc/src/sgml/intarray.sgml286
1 files changed, 286 insertions, 0 deletions
diff --git a/doc/src/sgml/intarray.sgml b/doc/src/sgml/intarray.sgml
new file mode 100644
index 00000000000..7e538a894d5
--- /dev/null
+++ b/doc/src/sgml/intarray.sgml
@@ -0,0 +1,286 @@
+<sect1 id="intarray">
+ <title>intarray</title>
+
+ <indexterm zone="intarray">
+ <primary>intarray</primary>
+ </indexterm>
+
+ <para>
+ This is an implementation of RD-tree data structure using GiST interface
+ of PostgreSQL. It has built-in lossy compression.
+ </para>
+
+ <para>
+ Current implementation provides index support for one-dimensional array of
+ int4's - gist__int_ops, suitable for small and medium size of arrays (used on
+ default), and gist__intbig_ops for indexing large arrays (we use superimposed
+ signature with length of 4096 bits to represent sets).
+ </para>
+
+ <sect2>
+ <title>Functions</title>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ <literal>int icount(int[])</literal> - the number of elements in intarray
+ </para>
+ <programlisting>
+test=# select icount('{1,2,3}'::int[]);
+ icount
+--------
+ 3
+(1 row)
+ </programlisting>
+ </listitem>
+
+ <listitem>
+ <para>
+ <literal>int[] sort(int[], 'asc' | 'desc')</literal> - sort intarray
+ </para>
+ <programlisting>
+test=# select sort('{1,2,3}'::int[],'desc');
+ sort
+---------
+ {3,2,1}
+(1 row)
+ </programlisting>
+ </listitem>
+
+ <listitem>
+ <para>
+ <literal>int[] sort(int[])</literal> - sort in ascending order
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ <literal>int[] sort_asc(int[]),sort_desc(int[])</literal> - shortcuts for sort
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ <literal>int[] uniq(int[])</literal> - returns unique elements
+ </para>
+ <programlisting>
+test=# select uniq(sort('{1,2,3,2,1}'::int[]));
+ uniq
+---------
+ {1,2,3}
+(1 row)
+ </programlisting>
+ </listitem>
+
+ <listitem>
+ <para>
+ <literal>int idx(int[], int item)</literal> - returns index of first
+ intarray matching element to item, or '0' if matching failed.
+ </para>
+ <programlisting>
+test=# select idx('{1,2,3,2,1}'::int[],2);
+ idx
+-----
+ 2
+(1 row)
+ </programlisting>
+ </listitem>
+
+ <listitem>
+ <para>
+ <literal>int[] subarray(int[],int START [, int LEN])</literal> - returns
+ part of intarray starting from element number START (from 1) and length LEN.
+ </para>
+ <programlisting>
+test=# select subarray('{1,2,3,2,1}'::int[],2,3);
+ subarray
+----------
+ {2,3,2}
+(1 row)
+ </programlisting>
+ </listitem>
+
+ <listitem>
+ <para>
+ <literal>int[] intset(int4)</literal> - casting int4 to int[]
+ </para>
+ <programlisting>
+test=# select intset(1);
+ intset
+--------
+ {1}
+(1 row)
+ </programlisting>
+ </listitem>
+
+ </itemizedlist>
+ </sect2>
+
+ <sect2>
+ <title>Operations</title>
+ <table>
+ <title>Operations</title>
+ <tgroup cols="2">
+ <thead>
+ <row>
+ <entry>Operator</entry>
+ <entry>Description</entry>
+ </row>
+ </thead>
+ <tbody>
+ <row>
+ <entry><literal>int[] && int[]</literal></entry>
+ <entry>overlap - returns TRUE if arrays have at least one common element</entry>
+ </row>
+ <row>
+ <entry><literal>int[] @> int[]</literal></entry>
+ <entry>contains - returns TRUE if left array contains right array</entry>
+ </row>
+ <row>
+ <entry><literal>int[] <@ int[]</literal></entry>
+ <entry>contained - returns TRUE if left array is contained in right array</entry>
+ </row>
+ <row>
+ <entry><literal># int[]</literal></entry>
+ <entry>returns the number of elements in array</entry>
+ </row>
+ <row>
+ <entry><literal>int[] + int</literal></entry>
+ <entry>push element to array ( add to end of array)</entry>
+ </row>
+ <row>
+ <entry><literal>int[] + int[] </literal></entry>
+ <entry>merge of arrays (right array added to the end of left one)</entry>
+ </row>
+ <row>
+ <entry><literal>int[] - int</literal></entry>
+ <entry>remove entries matched by right argument from array</entry>
+ </row>
+ <row>
+ <entry><literal>int[] - int[]</literal></entry>
+ <entry>remove right array from left</entry>
+ </row>
+ <row>
+ <entry><literal>int[] | int</literal></entry>
+ <entry>returns intarray - union of arguments</entry>
+ </row>
+ <row>
+ <entry><literal>int[] | int[]</literal></entry>
+ <entry>returns intarray as a union of two arrays</entry>
+ </row>
+
+ <row>
+ <entry><literal>int[] & int[]</literal></entry>
+ <entry>returns intersection of arrays</entry>
+ </row>
+
+ <row>
+ <entry><literal>int[] @@ query_int</literal></entry>
+ <entry>
+ returns TRUE if array satisfies query (like
+ <literal>'1&amp;(2|3)'</literal>)
+ </entry>
+ </row>
+
+ <row>
+ <entry><literal>query_int ~~ int[]</literal></entry>
+ <entry>returns TRUE if array satisfies query (commutator of @@)</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <para>
+ (Before PostgreSQL 8.2, the containment operators @> and <@ were
+ respectively called @ and ~. These names are still available, but are
+ deprecated and will eventually be retired. Notice that the old names
+ are reversed from the convention formerly followed by the core geometric
+ datatypes!)
+ </para>
+ </sect2>
+
+ <sect2>
+ <title>Example</title>
+
+ <programlisting>
+CREATE TABLE message (mid INT NOT NULL,sections INT[]);
+CREATE TABLE message_section_map (mid INT NOT NULL,sid INT NOT NULL);
+
+-- create indices
+CREATE unique index message_key ON message ( mid );
+CREATE unique index message_section_map_key2 ON message_section_map (sid, mid );
+CREATE INDEX message_rdtree_idx ON message USING GIST ( sections gist__int_ops);
+
+-- select some messages with section in 1 OR 2 - OVERLAP operator
+SELECT message.mid FROM message WHERE message.sections && '{1,2}';
+
+-- select messages contains in sections 1 AND 2 - CONTAINS operator
+SELECT message.mid FROM message WHERE message.sections @> '{1,2}';
+-- the same, CONTAINED operator
+SELECT message.mid FROM message WHERE '{1,2}' <@ message.sections;
+ </programlisting>
+ </sect2>
+
+ <sect2>
+ <title>Benchmark</title>
+ <para>
+ subdirectory bench contains benchmark suite.
+ </para>
+ <programlisting>
+ cd ./bench
+ 1. createdb TEST
+ 2. psql TEST < ../_int.sql
+ 3. ./create_test.pl | psql TEST
+ 4. ./bench.pl - perl script to benchmark queries, supports OR, AND queries
+ with/without RD-Tree. Run script without arguments to
+ see availbale options.
+
+ a)test without RD-Tree (OR)
+ ./bench.pl -d TEST -c -s 1,2 -v
+ b)test with RD-Tree
+ ./bench.pl -d TEST -c -s 1,2 -v -r
+
+ BENCHMARKS:
+
+ Size of table &lt;message>: 200000
+ Size of table &lt;message_section_map>: 269133
+
+ Distribution of messages by sections:
+
+ section 0: 74377 messages
+ section 1: 16284 messages
+ section 50: 1229 messages
+ section 99: 683 messages
+
+ old - without RD-Tree support,
+ new - with RD-Tree
+
+ +----------+---------------+----------------+
+ |Search set|OR, time in sec|AND, time in sec|
+ | +-------+-------+--------+-------+
+ | | old | new | old | new |
+ +----------+-------+-------+--------+-------+
+ | 1| 0.625| 0.101| -| -|
+ +----------+-------+-------+--------+-------+
+ | 99| 0.018| 0.017| -| -|
+ +----------+-------+-------+--------+-------+
+ | 1,2| 0.766| 0.133| 0.628| 0.045|
+ +----------+-------+-------+--------+-------+
+ | 1,2,50,65| 0.794| 0.141| 0.030| 0.006|
+ +----------+-------+-------+--------+-------+
+ </programlisting>
+ </sect2>
+
+ <sect2>
+ <title>Authors</title>
+ <para>
+ All work was done by Teodor Sigaev (<email>teodor@stack.net</email>) and Oleg
+ Bartunov (<email>oleg@sai.msu.su</email>). See
+ <ulink url="http://www.sai.msu.su/~megera/postgres/gist"></ulink> for
+ additional information. Andrey Oktyabrski did a great work on adding new
+ functions and operations.
+ </para>
+ </sect2>
+
+</sect1>
+