diff options
Diffstat (limited to 'doc/src/sgml/intarray.sgml')
-rw-r--r-- | doc/src/sgml/intarray.sgml | 286 |
1 files changed, 286 insertions, 0 deletions
diff --git a/doc/src/sgml/intarray.sgml b/doc/src/sgml/intarray.sgml new file mode 100644 index 00000000000..7e538a894d5 --- /dev/null +++ b/doc/src/sgml/intarray.sgml @@ -0,0 +1,286 @@ +<sect1 id="intarray"> + <title>intarray</title> + + <indexterm zone="intarray"> + <primary>intarray</primary> + </indexterm> + + <para> + This is an implementation of RD-tree data structure using GiST interface + of PostgreSQL. It has built-in lossy compression. + </para> + + <para> + Current implementation provides index support for one-dimensional array of + int4's - gist__int_ops, suitable for small and medium size of arrays (used on + default), and gist__intbig_ops for indexing large arrays (we use superimposed + signature with length of 4096 bits to represent sets). + </para> + + <sect2> + <title>Functions</title> + + <itemizedlist> + + <listitem> + <para> + <literal>int icount(int[])</literal> - the number of elements in intarray + </para> + <programlisting> +test=# select icount('{1,2,3}'::int[]); + icount +-------- + 3 +(1 row) + </programlisting> + </listitem> + + <listitem> + <para> + <literal>int[] sort(int[], 'asc' | 'desc')</literal> - sort intarray + </para> + <programlisting> +test=# select sort('{1,2,3}'::int[],'desc'); + sort +--------- + {3,2,1} +(1 row) + </programlisting> + </listitem> + + <listitem> + <para> + <literal>int[] sort(int[])</literal> - sort in ascending order + </para> + </listitem> + + <listitem> + <para> + <literal>int[] sort_asc(int[]),sort_desc(int[])</literal> - shortcuts for sort + </para> + </listitem> + + <listitem> + <para> + <literal>int[] uniq(int[])</literal> - returns unique elements + </para> + <programlisting> +test=# select uniq(sort('{1,2,3,2,1}'::int[])); + uniq +--------- + {1,2,3} +(1 row) + </programlisting> + </listitem> + + <listitem> + <para> + <literal>int idx(int[], int item)</literal> - returns index of first + intarray matching element to item, or '0' if matching failed. + </para> + <programlisting> +test=# select idx('{1,2,3,2,1}'::int[],2); + idx +----- + 2 +(1 row) + </programlisting> + </listitem> + + <listitem> + <para> + <literal>int[] subarray(int[],int START [, int LEN])</literal> - returns + part of intarray starting from element number START (from 1) and length LEN. + </para> + <programlisting> +test=# select subarray('{1,2,3,2,1}'::int[],2,3); + subarray +---------- + {2,3,2} +(1 row) + </programlisting> + </listitem> + + <listitem> + <para> + <literal>int[] intset(int4)</literal> - casting int4 to int[] + </para> + <programlisting> +test=# select intset(1); + intset +-------- + {1} +(1 row) + </programlisting> + </listitem> + + </itemizedlist> + </sect2> + + <sect2> + <title>Operations</title> + <table> + <title>Operations</title> + <tgroup cols="2"> + <thead> + <row> + <entry>Operator</entry> + <entry>Description</entry> + </row> + </thead> + <tbody> + <row> + <entry><literal>int[] && int[]</literal></entry> + <entry>overlap - returns TRUE if arrays have at least one common element</entry> + </row> + <row> + <entry><literal>int[] @> int[]</literal></entry> + <entry>contains - returns TRUE if left array contains right array</entry> + </row> + <row> + <entry><literal>int[] <@ int[]</literal></entry> + <entry>contained - returns TRUE if left array is contained in right array</entry> + </row> + <row> + <entry><literal># int[]</literal></entry> + <entry>returns the number of elements in array</entry> + </row> + <row> + <entry><literal>int[] + int</literal></entry> + <entry>push element to array ( add to end of array)</entry> + </row> + <row> + <entry><literal>int[] + int[] </literal></entry> + <entry>merge of arrays (right array added to the end of left one)</entry> + </row> + <row> + <entry><literal>int[] - int</literal></entry> + <entry>remove entries matched by right argument from array</entry> + </row> + <row> + <entry><literal>int[] - int[]</literal></entry> + <entry>remove right array from left</entry> + </row> + <row> + <entry><literal>int[] | int</literal></entry> + <entry>returns intarray - union of arguments</entry> + </row> + <row> + <entry><literal>int[] | int[]</literal></entry> + <entry>returns intarray as a union of two arrays</entry> + </row> + + <row> + <entry><literal>int[] & int[]</literal></entry> + <entry>returns intersection of arrays</entry> + </row> + + <row> + <entry><literal>int[] @@ query_int</literal></entry> + <entry> + returns TRUE if array satisfies query (like + <literal>'1&(2|3)'</literal>) + </entry> + </row> + + <row> + <entry><literal>query_int ~~ int[]</literal></entry> + <entry>returns TRUE if array satisfies query (commutator of @@)</entry> + </row> + </tbody> + </tgroup> + </table> + <para> + (Before PostgreSQL 8.2, the containment operators @> and <@ were + respectively called @ and ~. These names are still available, but are + deprecated and will eventually be retired. Notice that the old names + are reversed from the convention formerly followed by the core geometric + datatypes!) + </para> + </sect2> + + <sect2> + <title>Example</title> + + <programlisting> +CREATE TABLE message (mid INT NOT NULL,sections INT[]); +CREATE TABLE message_section_map (mid INT NOT NULL,sid INT NOT NULL); + +-- create indices +CREATE unique index message_key ON message ( mid ); +CREATE unique index message_section_map_key2 ON message_section_map (sid, mid ); +CREATE INDEX message_rdtree_idx ON message USING GIST ( sections gist__int_ops); + +-- select some messages with section in 1 OR 2 - OVERLAP operator +SELECT message.mid FROM message WHERE message.sections && '{1,2}'; + +-- select messages contains in sections 1 AND 2 - CONTAINS operator +SELECT message.mid FROM message WHERE message.sections @> '{1,2}'; +-- the same, CONTAINED operator +SELECT message.mid FROM message WHERE '{1,2}' <@ message.sections; + </programlisting> + </sect2> + + <sect2> + <title>Benchmark</title> + <para> + subdirectory bench contains benchmark suite. + </para> + <programlisting> + cd ./bench + 1. createdb TEST + 2. psql TEST < ../_int.sql + 3. ./create_test.pl | psql TEST + 4. ./bench.pl - perl script to benchmark queries, supports OR, AND queries + with/without RD-Tree. Run script without arguments to + see availbale options. + + a)test without RD-Tree (OR) + ./bench.pl -d TEST -c -s 1,2 -v + b)test with RD-Tree + ./bench.pl -d TEST -c -s 1,2 -v -r + + BENCHMARKS: + + Size of table <message>: 200000 + Size of table <message_section_map>: 269133 + + Distribution of messages by sections: + + section 0: 74377 messages + section 1: 16284 messages + section 50: 1229 messages + section 99: 683 messages + + old - without RD-Tree support, + new - with RD-Tree + + +----------+---------------+----------------+ + |Search set|OR, time in sec|AND, time in sec| + | +-------+-------+--------+-------+ + | | old | new | old | new | + +----------+-------+-------+--------+-------+ + | 1| 0.625| 0.101| -| -| + +----------+-------+-------+--------+-------+ + | 99| 0.018| 0.017| -| -| + +----------+-------+-------+--------+-------+ + | 1,2| 0.766| 0.133| 0.628| 0.045| + +----------+-------+-------+--------+-------+ + | 1,2,50,65| 0.794| 0.141| 0.030| 0.006| + +----------+-------+-------+--------+-------+ + </programlisting> + </sect2> + + <sect2> + <title>Authors</title> + <para> + All work was done by Teodor Sigaev (<email>teodor@stack.net</email>) and Oleg + Bartunov (<email>oleg@sai.msu.su</email>). See + <ulink url="http://www.sai.msu.su/~megera/postgres/gist"></ulink> for + additional information. Andrey Oktyabrski did a great work on adding new + functions and operations. + </para> + </sect2> + +</sect1> + |