12 files changed, 808 insertions, 3 deletions
diff --git a/contrib/Makefile b/contrib/Makefile
index 85cabd8618a..8543b5287fe 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -1,4 +1,4 @@
-# $PostgreSQL: pgsql/contrib/Makefile,v 1.88 2009/08/07 20:50:21 petere Exp $
+# $PostgreSQL: pgsql/contrib/Makefile,v 1.89 2009/08/18 10:34:39 teodor Exp $
 
 subdir = contrib
 top_builddir = ..
@@ -39,6 +39,7 @@ SUBDIRS = \
 		tablefunc	\
 		test_parser	\
 		tsearch2	\
+		unaccent	\
 		vacuumlo
 
 ifeq ($(with_openssl),yes)
diff --git a/contrib/README b/contrib/README
index 1ae49adc704..a8396a5bfad 100644
--- a/contrib/README
+++ b/contrib/README
@@ -169,6 +169,10 @@ tsearch2 -
 	Pavel Stehule <pavel.stehule@gmail.com>, based on code originally by
 	Teodor Sigaev <teodor@sigaev.ru> and Oleg Bartunov <oleg@sai.msu.su>.
 
+unaccent -
+	Unaccent dictionary for text search
+	Teodor Sigaev <teodor@sigaev.ru> and Oleg Bartunov <oleg@sai.msu.su>.
+
 uuid-ossp -
 	UUID generation functions
 	by Peter Eisentraut <peter_e@gmx.net>
diff --git a/contrib/unaccent/Makefile b/contrib/unaccent/Makefile
new file mode 100644
index 00000000000..91b04fc2753
--- /dev/null
+++ b/contrib/unaccent/Makefile
@@ -0,0 +1,24 @@
+# $PostgreSQL: pgsql/contrib/unaccent/Makefile,v 1.1 2009/08/18 10:34:39 teodor Exp $
+
+MODULE_big = unaccent
+OBJS = unaccent.o
+
+DATA_built = unaccent.sql
+DATA = uninstall_unaccent.sql
+DATA_TSEARCH = unaccent.rules
+REGRESS = unaccent
+
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_trgm
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+#redefine REGRESS_OPTS because of needings of UTF8 database
+REGRESS_OPTS = --dbname=$(CONTRIB_TESTDB) --multibyte=UTF8 --no-locale 
diff --git a/contrib/unaccent/expected/unaccent.out b/contrib/unaccent/expected/unaccent.out
new file mode 100644
index 00000000000..8d197c50be7
--- /dev/null
+++ b/contrib/unaccent/expected/unaccent.out
@@ -0,0 +1,58 @@
+SET client_min_messages = warning;
+\set ECHO none
+RESET client_min_messages;
+SET client_encoding TO 'KOI8';
+SELECT unaccent('foobar');
+ unaccent 
+----------
+ foobar
+(1 row)
+
+SELECT unaccent('ёлка');
+ unaccent 
+----------
+ елка
+(1 row)
+
+SELECT unaccent('ЁЖИК');
+ unaccent 
+----------
+ ЕЖИК
+(1 row)
+
+SELECT unaccent('unaccent', 'foobar');
+ unaccent 
+----------
+ foobar
+(1 row)
+
+SELECT unaccent('unaccent', 'ёлка');
+ unaccent 
+----------
+ елка
+(1 row)
+
+SELECT unaccent('unaccent', 'ЁЖИК');
+ unaccent 
+----------
+ ЕЖИК
+(1 row)
+
+SELECT ts_lexize('unaccent', 'foobar');
+ ts_lexize 
+-----------
+ 
+(1 row)
+
+SELECT ts_lexize('unaccent', 'ёлка');
+ ts_lexize 
+-----------
+ {елка}
+(1 row)
+
+SELECT ts_lexize('unaccent', 'ЁЖИК');
+ ts_lexize 
+-----------
+ {ЕЖИК}
+(1 row)
+
diff --git a/contrib/unaccent/sql/unaccent.sql b/contrib/unaccent/sql/unaccent.sql
new file mode 100644
index 00000000000..71ab5bb4358
--- /dev/null
+++ b/contrib/unaccent/sql/unaccent.sql
@@ -0,0 +1,19 @@
+SET client_min_messages = warning;
+\set ECHO none
+\i unaccent.sql
+\set ECHO all
+RESET client_min_messages;
+
+SET client_encoding TO 'KOI8';
+
+SELECT unaccent('foobar');
+SELECT unaccent('ёлка');
+SELECT unaccent('ЁЖИК');
+
+SELECT unaccent('unaccent', 'foobar');
+SELECT unaccent('unaccent', 'ёлка');
+SELECT unaccent('unaccent', 'ЁЖИК');
+
+SELECT ts_lexize('unaccent', 'foobar');
+SELECT ts_lexize('unaccent', 'ёлка');
+SELECT ts_lexize('unaccent', 'ЁЖИК');
diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c
new file mode 100644
index 00000000000..7b5086b9587
--- /dev/null
+++ b/contrib/unaccent/unaccent.c
@@ -0,0 +1,318 @@
+/*-------------------------------------------------------------------------
+ *
+ * unaccent.c
+ *    Text search unaccent dictionary
+ *
+ * Copyright (c) 2009, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *    $PostgreSQL: pgsql/contrib/unaccent/unaccent.c,v 1.1 2009/08/18 10:34:39 teodor Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "catalog/namespace.h"
+#include "commands/defrem.h"
+#include "mb/pg_wchar.h"
+#include "tsearch/ts_cache.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_public.h"
+#include "utils/builtins.h"
+
+PG_MODULE_MAGIC;
+
+/*
+ * Unaccent dictionary uses uncompressed suffix tree to find a 
+ * character to replace. Each node of tree is an array of 
+ * SuffixChar struct with length = 256 (n-th element of array
+ * corresponds to byte)
+ */
+typedef struct SuffixChar {
+	struct SuffixChar	*nextChar;
+	char				*replaceTo;
+	int					replacelen;
+} SuffixChar;
+
+/*
+ * placeChar - put str into tree's structure, byte by byte.
+ */
+static SuffixChar*
+placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen)
+{
+	SuffixChar	*curnode;
+
+	if ( !node )
+	{
+		node = palloc(sizeof(SuffixChar) * 256);
+		memset(node, 0, sizeof(SuffixChar) * 256);
+	}
+
+	curnode = node + *str;
+
+	if ( lenstr == 1 )
+	{
+		if ( curnode->replaceTo )
+			elog(WARNING, "duplicate TO argument, use first one");
+		else
+		{
+			curnode->replacelen = replacelen;
+			curnode->replaceTo = palloc( replacelen );
+			memcpy(curnode->replaceTo, replaceTo, replacelen);
+		}
+	}
+	else
+	{
+		curnode->nextChar = placeChar( curnode->nextChar, str+1, lenstr-1, replaceTo, replacelen);
+	}
+
+	return node;
+}
+
+/*
+ * initSuffixTree  - create suffix tree from file. Function converts
+ * UTF8-encoded file into current encoding.
+ */
+static SuffixChar*
+initSuffixTree(char *filename) 
+{
+	SuffixChar *rootSuffixTree = NULL;
+	MemoryContext ccxt = CurrentMemoryContext;
+	tsearch_readline_state	trst;
+	bool			skip;
+
+	filename = get_tsearch_config_filename(filename, "rules");
+	if (!tsearch_readline_begin(&trst, filename))
+		ereport(ERROR,
+				(errcode(ERRCODE_CONFIG_FILE_ERROR),
+				 errmsg("could not open unaccent file \"%s\": %m",
+						filename)));
+
+	do	
+	{
+		char	src[4096];
+		char	trg[4096];
+		int		srclen;
+		int		trglen;
+		char   *line = NULL;
+
+		skip = true;
+
+		PG_TRY();
+		{
+			/*
+			 * pg_do_encoding_conversion() (called by tsearch_readline())
+			 * will emit exception if it finds untranslatable characters in current locale.
+			 * We just skip such characters.
+			 */
+			while ((line = tsearch_readline(&trst)) != NULL)
+			{
+				if ( sscanf(line, "%s\t%s\n", src, trg)!=2 )
+					continue;
+
+				srclen = strlen(src);
+				trglen = strlen(trg);
+
+				rootSuffixTree = placeChar(rootSuffixTree, 
+											(unsigned char*)src, srclen, 
+											trg, trglen);
+				skip = false;
+				pfree(line);
+			}
+		}
+		PG_CATCH();
+		{
+			ErrorData  *errdata;
+			MemoryContext ecxt;
+
+			ecxt = MemoryContextSwitchTo(ccxt);
+			errdata = CopyErrorData();
+			if (errdata->sqlerrcode == ERRCODE_UNTRANSLATABLE_CHARACTER)
+			{
+				FlushErrorState();
+			}
+			else
+			{
+				MemoryContextSwitchTo(ecxt);
+				PG_RE_THROW();
+			}
+		}
+		PG_END_TRY();
+	}
+	while(skip);
+
+	tsearch_readline_end(&trst);
+
+	return rootSuffixTree;
+}
+
+/*
+ * findReplaceTo - find multibyte character in tree
+ */
+static SuffixChar * 
+findReplaceTo( SuffixChar *node, unsigned char *src, int srclen )
+{
+	while( node ) 
+	{
+		node = node + *src;
+		if ( srclen == 1 )
+			return node;
+
+		src++;
+		srclen--;
+		node = node->nextChar;
+	}
+
+	return NULL;
+}
+
+PG_FUNCTION_INFO_V1(unaccent_init);
+Datum       unaccent_init(PG_FUNCTION_ARGS);
+Datum
+unaccent_init(PG_FUNCTION_ARGS)
+{
+	List       *dictoptions = (List *) PG_GETARG_POINTER(0);
+	SuffixChar *rootSuffixTree;
+	bool        fileloaded = false;
+	ListCell   *l;
+
+	foreach(l, dictoptions)
+	{
+		DefElem    *defel = (DefElem *) lfirst(l);
+
+		if (pg_strcasecmp("Rules", defel->defname) == 0)
+		{
+			if (fileloaded)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("multiple Rules parameters")));
+				rootSuffixTree = initSuffixTree(defGetString(defel));
+				fileloaded = true;
+		}
+		else
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("unrecognized Unaccent parameter: \"%s\"",
+							defel->defname)));
+		}
+	}
+
+	if (!fileloaded)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("missing Rules parameter")));
+	}
+
+	PG_RETURN_POINTER(rootSuffixTree);
+}
+
+PG_FUNCTION_INFO_V1(unaccent_lexize);
+Datum       unaccent_lexize(PG_FUNCTION_ARGS);
+Datum
+unaccent_lexize(PG_FUNCTION_ARGS)
+{
+	SuffixChar *rootSuffixTree = (SuffixChar*)PG_GETARG_POINTER(0);
+	char       *srcchar = (char *) PG_GETARG_POINTER(1);
+	int32		len = PG_GETARG_INT32(2);
+	char	   *srcstart, *trgchar;
+	int			charlen;
+	TSLexeme   *res = NULL;
+	SuffixChar *node;
+
+	srcstart = srcchar;
+	while( srcchar - srcstart < len )
+	{
+		charlen = pg_mblen(srcchar);
+
+		node = findReplaceTo( rootSuffixTree, (unsigned char *) srcchar, charlen );
+		if ( node  && node->replaceTo )
+		{
+			if ( !res )
+			{
+				/* allocate res only it it's needed */
+				res = palloc0(sizeof(TSLexeme) * 2);
+				res->lexeme = trgchar = palloc( len * pg_database_encoding_max_length() + 1 /* \0 */ );
+				res->flags = TSL_FILTER;
+				if ( srcchar != srcstart )
+				{
+					memcpy(trgchar, srcstart, srcchar - srcstart);
+					trgchar += (srcchar - srcstart);
+				}
+			}
+			memcpy( trgchar, node->replaceTo, node->replacelen );
+			trgchar += node->replacelen; 
+		}
+		else if ( res )
+		{
+			memcpy( trgchar, srcchar, charlen );
+			trgchar += charlen;
+		}
+
+		srcchar += charlen;
+	}
+
+	if ( res )
+		*trgchar = '\0';
+
+	PG_RETURN_POINTER(res);
+}
+
+/*
+ * Function-like wrapper for dictionary
+ */
+PG_FUNCTION_INFO_V1(unaccent_dict);
+Datum       unaccent_dict(PG_FUNCTION_ARGS);
+Datum
+unaccent_dict(PG_FUNCTION_ARGS)
+{
+	text	*str;
+	int		strArg;
+	Oid		dictOid;
+	TSDictionaryCacheEntry	*dict;
+	TSLexeme *res;
+
+	if (PG_NARGS() == 1)
+	{
+		dictOid = TSDictionaryGetDictid(stringToQualifiedNameList("unaccent"), false);
+		strArg = 0;
+	}
+	else
+	{
+		dictOid = PG_GETARG_OID(0);
+		strArg = 1;
+	}
+	str = PG_GETARG_TEXT_P(strArg);
+
+	dict = lookup_ts_dictionary_cache(dictOid);
+
+	res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(dict->lexize),
+													 PointerGetDatum(dict->dictData),
+													 PointerGetDatum(VARDATA(str)),
+													 Int32GetDatum(VARSIZE(str) - VARHDRSZ),
+													 PointerGetDatum(NULL)));
+
+	PG_FREE_IF_COPY(str, strArg);
+
+	if ( res == NULL )
+	{
+		PG_RETURN_TEXT_P(PG_GETARG_TEXT_P_COPY(strArg));
+	}
+	else if ( res->lexeme == NULL )
+	{
+		pfree(res);
+		PG_RETURN_TEXT_P(PG_GETARG_TEXT_P_COPY(strArg));
+	}
+	else
+	{
+		text *txt = cstring_to_text(res->lexeme);
+
+		pfree(res->lexeme);
+		pfree(res);
+
+		PG_RETURN_TEXT_P(txt);
+	}
+}
diff --git a/contrib/unaccent/unaccent.rules b/contrib/unaccent/unaccent.rules
new file mode 100644
index 00000000000..cc2f7a65858
--- /dev/null
+++ b/contrib/unaccent/unaccent.rules
@@ -0,0 +1,187 @@
+ц─	A
+ц│	A
+ц┌	A
+ц┐	A
+ц└	A
+ц┘	A
+ц├	A
+ц═	a
+ц║	a
+ц╒	a
+цё	a
+цє	a
+ц╔	a
+ці	a
+д─	A
+д│	a
+д┌	A
+д┐	a
+д└	A
+д┘	a
+ц┤	C
+цї	c
+д├	C
+д┤	c
+д┬	C
+д┴	c
+д┼	C
+д▀	c
+д▄	C
+д█	c
+д▌	D
+д▐	d
+д░	D
+д▒	d
+ц┬	E
+ц┴	E
+ц┼	E
+ц▀	E
+ц╗	e
+ц╘	e
+ц╙	e
+ц╚	e
+д▓	E
+д⌠	e
+д■	E
+д∙	e
+д√	E
+д≈	e
+д≤	E
+д≥	e
+д 	E
+д⌡	e
+д°	G
+д²	g
+д·	G
+д÷	g
+д═	G
+д║	g
+д╒	G
+дё	g
+дє	H
+д╔	h
+ді	H
+дї	h
+д╗	I
+ц▄	I
+ц█	I
+ц▌	I
+ц▐	I
+ц╛	i
+цґ	i
+цў	i
+ц╞	i
+д╘	i
+д╙	I
+д╚	i
+д╛	I
+дґ	i
+дў	I
+д╞	i
+д╟	I
+д╠	i
+д╡	I
+дЁ	i
+дЄ	J
+д╣	j
+дІ	K
+дЇ	k
+д╦	k
+д╧	L
+д╨	l
+д╩	L
+д╪	l
+дҐ	L
+дЎ	l
+д©	L
+е─	l
+е│	L
+е┌	l
+ц▒	N
+ц╠	n
+е┐	N
+е└	n
+е┘	N
+е├	n
+е┤	N
+е┬	n
+е┴	n
+е┼	N
+е▀	n
+ц▓	O
+ц⌠	O
+ц■	O
+ц∙	O
+ц√	O
+ц╡	o
+цЁ	o
+цЄ	o
+ц╣	o
+цІ	o
+е▄	O
+е█	o
+е▌	O
+е▐	o
+е░	O
+е▒	o
+е▓	E
+е⌠	e
+ц≤	O
+ц╦	o
+е■	R
+е∙	r
+е√	R
+е≈	r
+е≤	R
+е≥	r
+ц÷	S
+е 	S
+е⌡	s
+е°	S
+е²	s
+е·	S
+е÷	s
+е═	S
+е║	s
+е╒	T
+её	t
+еє	T
+е╔	t
+еі	T
+еї	t
+ц≥	U
+ц 	U
+ц⌡	U
+ц°	U
+ц╧	u
+ц╨	u
+ц╩	u
+ц╪	u
+е╗	U
+е╘	u
+е╙	U
+е╚	u
+е╛	U
+еґ	u
+еў	U
+е╞	u
+е╟	U
+е╠	u
+е╡	U
+еЁ	u
+еЄ	W
+е╣	w
+ц²	Y
+цҐ	y
+ц©	y
+еІ	Y
+еЇ	y
+е╦	Y
+е╧	Z
+е╨	z
+е╩	Z
+е╪	z
+еҐ	Z
+еЎ	z
+я▒	п╣
+п│	п∙
diff --git a/contrib/unaccent/unaccent.sql.in b/contrib/unaccent/unaccent.sql.in
new file mode 100644
index 00000000000..ba981398faf
--- /dev/null
+++ b/contrib/unaccent/unaccent.sql.in
@@ -0,0 +1,33 @@
+/* $PostgreSQL: pgsql/contrib/unaccent/unaccent.sql.in,v 1.1 2009/08/18 10:34:39 teodor Exp $ */
+
+CREATE OR REPLACE FUNCTION unaccent(regdictionary, text)
+	RETURNS text
+	AS 'MODULE_PATHNAME', 'unaccent_dict'
+	LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION unaccent(text)
+	RETURNS text
+	AS 'MODULE_PATHNAME', 'unaccent_dict'
+	LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION unaccent_init(internal)
+	RETURNS internal
+	AS 'MODULE_PATHNAME', 'unaccent_init'
+	LANGUAGE C;
+
+CREATE OR REPLACE FUNCTION unaccent_lexize(internal,internal,internal,internal)
+	RETURNS internal
+	AS 'MODULE_PATHNAME', 'unaccent_lexize'
+	LANGUAGE C;
+
+CREATE TEXT SEARCH TEMPLATE unaccent (
+    INIT = unaccent_init,
+	LEXIZE = unaccent_lexize
+);
+
+
+CREATE TEXT SEARCH DICTIONARY unaccent (
+	TEMPLATE = unaccent,
+	RULES    = 'unaccent'
+);
+
diff --git a/contrib/unaccent/uninstall_unaccent.sql b/contrib/unaccent/uninstall_unaccent.sql
new file mode 100644
index 00000000000..89e3627fc8c
--- /dev/null
+++ b/contrib/unaccent/uninstall_unaccent.sql
@@ -0,0 +1,9 @@
+/* $PostgreSQL: pgsql/contrib/unaccent/uninstall_unaccent.sql,v 1.1 2009/08/18 10:34:39 teodor Exp $ */
+
+DROP FUNCTION IF EXISTS unaccent(regdictionary, text) CASCADE;
+DROP FUNCTION IF EXISTS unaccent(text) CASCADE;
+DROP TEXT SEARCH DICTIONARY IF EXISTS unaccent CASCADE;
+DROP TEXT SEARCH TEMPLATE IF EXISTS unaccent CASCADE;
+DROP FUNCTION IF EXISTS unaccent_init(internal) CASCADE;
+DROP FUNCTION IF EXISTS unaccent_lexize(internal,internal,internal,internal) CASCADE;
+
diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml
index 0ef92b48968..cffbc55249c 100644
--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/contrib.sgml,v 1.13 2009/04/27 16:27:35 momjian Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/contrib.sgml,v 1.14 2009/08/18 10:34:39 teodor Exp $ -->
 
 <appendix id="contrib">
  <title>Additional Supplied Modules</title>
@@ -113,6 +113,7 @@ psql -d dbname -f <replaceable>SHAREDIR</>/contrib/<replaceable>module</>.sql
  &tablefunc;
  &test-parser;
  &tsearch2;
+ &unaccent;
  &uuid-ossp;
  &vacuumlo;
  &xml2;
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index 7e194f7bccb..bee66008b66 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/filelist.sgml,v 1.63 2009/08/17 22:14:44 petere Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/filelist.sgml,v 1.64 2009/08/18 10:34:39 teodor Exp $ -->
 
 <!entity history    SYSTEM "history.sgml">
 <!entity info       SYSTEM "info.sgml">
@@ -126,6 +126,7 @@
 <!entity tablefunc       SYSTEM "tablefunc.sgml">
 <!entity test-parser     SYSTEM "test-parser.sgml">
 <!entity tsearch2        SYSTEM "tsearch2.sgml">
+<!entity unaccent      SYSTEM "unaccent.sgml">
 <!entity uuid-ossp       SYSTEM "uuid-ossp.sgml">
 <!entity vacuumlo        SYSTEM "vacuumlo.sgml">
 <!entity xml2            SYSTEM "xml2.sgml"> 
diff --git a/doc/src/sgml/unaccent.sgml b/doc/src/sgml/unaccent.sgml
new file mode 100644
index 00000000000..b3c7bbee489
--- /dev/null
+++ b/doc/src/sgml/unaccent.sgml
@@ -0,0 +1,150 @@
+<sect1 id="unaccent">
+ <title>unaccent</title>
+
+ <indexterm zone="unaccent">
+  <primary>unaccent</primary>
+ </indexterm>
+
+ <para>
+  <filename>unaccent</> removes accents (diacritic signs) from a lexeme.
+  It's a filtering dictionary, that means its output is 
+  always passed to the next dictionary (if any), contrary to the standard 
+  behaviour. Currently, it supports most important accents from european 
+  languages. 
+ </para>
+
+ <para>
+  Limitation: Current implementation of <filename>unaccent</> 
+  dictionary cannot be used as a normalizing dictionary for 
+  <filename>thesaurus</filename> dictionary.
+ </para>
+ 
+ <sect2>
+  <title>Configuration</title>
+
+  <para>
+   A <literal>unaccent</> dictionary accepts the following options:
+  </para>
+  <itemizedlist>
+   <listitem>
+    <para>
+     <literal>RULES</> is the base name of the file containing the list of
+     translation rules.  This file must be stored in
+     <filename>$SHAREDIR/tsearch_data/</> (where <literal>$SHAREDIR</> means
+     the <productname>PostgreSQL</> installation's shared-data directory).
+     Its name must end in <literal>.rules</> (which is not to be included in
+     the <literal>RULES</> parameter).
+    </para>
+   </listitem>
+  </itemizedlist>
+  <para>
+   The rules file has the following format:
+  </para>
+  <itemizedlist>
+   <listitem>
+    <para>
+     Each line represents pair: character_with_accent  character_without_accent
+    <programlisting>
+&Agrave;	A
+&Aacute; 	A
+&Acirc; 	A
+&Atilde;	A
+&Auml;  	A
+&Aring;		A
+&AElig; 	A
+    </programlisting>
+    </para>
+   </listitem>
+  </itemizedlist>
+
+  <para>
+   Look at <filename>unaccent.rules</>, which is installed in
+   <filename>$SHAREDIR/tsearch_data/</>, for an example.
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>Usage</title>
+
+  <para>
+   Running the installation script creates a text search template
+   <literal>unaccent</> and a dictionary <literal>unaccent</>
+   based on it, with default parameters.  You can alter the
+   parameters, for example
+
+<programlisting>
+=# ALTER TEXT SEARCH DICTIONARY unaccent (RULES='my_rules');
+</programlisting>
+
+   or create new dictionaries based on the template.
+  </para>
+
+  <para>
+   To test the dictionary, you can try
+
+<programlisting>
+=# select ts_lexize('unaccent','HцЄtel');
+ ts_lexize 
+-----------
+ {Hotel}
+(1 row)
+</programlisting>
+  </para>
+  
+  <para>
+  Filtering dictionary are useful for correct work of 
+  <function>ts_headline</function> function.
+<programlisting>
+=# CREATE TEXT SEARCH CONFIGURATION fr ( COPY = french );
+=# ALTER TEXT SEARCH CONFIGURATION fr
+	ALTER MAPPING FOR hword, hword_part, word
+	WITH unaccent, french_stem;
+=# select to_tsvector('fr','HцЄtels de la Mer');
+    to_tsvector    
+-------------------
+ 'hotel':1 'mer':4
+(1 row)
+
+=# select to_tsvector('fr','HцЄtel de la Mer') @@ to_tsquery('fr','Hotels');
+ ?column? 
+----------
+ t
+(1 row)
+=# select ts_headline('fr','HцЄtel de la Mer',to_tsquery('fr','Hotels'));
+      ts_headline       
+------------------------
+  &lt;b&gt;HцЄtel&lt;/b&gt;de la Mer
+(1 row)
+
+</programlisting>
+  </para>
+ </sect2>
+
+ <sect2>
+ <title>Function</title>
+
+ <para>
+  <function>unaccent</> function removes accents (diacritic signs) from
+  argument string. Basically, it's a wrapper around 
+  <filename>unaccent</> dictionary.
+ </para>
+
+ <indexterm>
+  <primary>unaccent</primary>
+ </indexterm>
+
+ <synopsis>
+   unaccent(<optional><replaceable class="PARAMETER">dictionary</replaceable>,
+   </optional> <replaceable class="PARAMETER">string</replaceable>) 
+  returns <type>text</type>
+ </synopsis>  
+
+ <para>
+<programlisting>
+SELECT unaccent('unaccent','HцЄtel');
+SELECT unaccent('HцЄtel');
+</programlisting>
+ </para>
+ </sect2>
+
+</sect1>