aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTeodor Sigaev <teodor@sigaev.ru>2007-09-11 11:54:42 +0000
committerTeodor Sigaev <teodor@sigaev.ru>2007-09-11 11:54:42 +0000
commit64def09592535dc043741fb46f16eb37e152d90f (patch)
tree5b8fcb4790bac14c628d6abf42da5265519fe4d7 /src
parentc4b2b2960aad6fc292dfaec330ebe41fead8ab30 (diff)
downloadpostgresql-64def09592535dc043741fb46f16eb37e152d90f.tar.gz
postgresql-64def09592535dc043741fb46f16eb37e152d90f.zip
Add regression tests for ispell, synonym and thesaurus dictionaries.
Rename synonym.syn.sample and thesaurs.ths.sample to synonym_sample.syn and thesaurs_sample.ths accordingly to be able to use they in regression test. Ispell dictionary uses synthetic simple dictionary files.
Diffstat (limited to 'src')
-rw-r--r--src/backend/tsearch/Makefile5
-rw-r--r--src/backend/tsearch/hunspell_sample.affix24
-rw-r--r--src/backend/tsearch/ispell_sample.affix26
-rw-r--r--src/backend/tsearch/ispell_sample.dict8
-rw-r--r--src/backend/tsearch/synonym.syn.sample3
-rw-r--r--src/backend/tsearch/synonym_sample.syn4
-rw-r--r--src/backend/tsearch/thesaurus_sample.ths (renamed from src/backend/tsearch/thesaurus.ths.sample)10
-rw-r--r--src/test/regress/expected/tsdicts.out320
-rw-r--r--src/test/regress/parallel_schedule4
-rw-r--r--src/test/regress/serial_schedule3
-rw-r--r--src/test/regress/sql/tsdicts.sql121
11 files changed, 514 insertions, 14 deletions
diff --git a/src/backend/tsearch/Makefile b/src/backend/tsearch/Makefile
index d5076ef18d4..720a5366677 100644
--- a/src/backend/tsearch/Makefile
+++ b/src/backend/tsearch/Makefile
@@ -4,7 +4,7 @@
#
# Copyright (c) 2006-2007, PostgreSQL Global Development Group
#
-# $PostgreSQL: pgsql/src/backend/tsearch/Makefile,v 1.2 2007/08/22 06:11:56 tgl Exp $
+# $PostgreSQL: pgsql/src/backend/tsearch/Makefile,v 1.3 2007/09/11 11:54:42 teodor Exp $
#
#-------------------------------------------------------------------------
subdir = src/backend/tsearch
@@ -13,7 +13,8 @@ include $(top_builddir)/src/Makefile.global
DICTDIR=tsearch_data
-DICTFILES=synonym.syn.sample thesaurus.ths.sample
+DICTFILES=synonym_sample.syn thesaurus_sample.ths hunspell_sample.affix \
+ ispell_sample.affix ispell_sample.dict
OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \
dict_simple.o dict_synonym.o dict_thesaurus.o \
diff --git a/src/backend/tsearch/hunspell_sample.affix b/src/backend/tsearch/hunspell_sample.affix
new file mode 100644
index 00000000000..d1984c295fb
--- /dev/null
+++ b/src/backend/tsearch/hunspell_sample.affix
@@ -0,0 +1,24 @@
+COMPOUNDFLAG Z
+ONLYINCOMPOUND L
+
+PFX B Y 1
+PFX B 0 re .
+
+PFX U N 1
+PFX U 0 un .
+
+SFX J Y 1
+SFX J 0 INGS [^E]
+
+SFX G Y 1
+SFX G 0 ING [^E]
+
+SFX S Y 1
+SFX S 0 S [^SXZHY]
+
+SFX A Y 1
+SFX A Y IES [^AEIOU]Y
+
+SFX \ N 1
+SFX \ 0 Y/L [^Y]
+
diff --git a/src/backend/tsearch/ispell_sample.affix b/src/backend/tsearch/ispell_sample.affix
new file mode 100644
index 00000000000..f29004ff1da
--- /dev/null
+++ b/src/backend/tsearch/ispell_sample.affix
@@ -0,0 +1,26 @@
+compoundwords controlled Z
+
+prefixes
+
+flag *B:
+ . > RE # As in enter > reenter
+
+flag U:
+ . > UN # As in natural > unnatural
+
+suffixes
+
+flag *J:
+ [^E] > INGS # As in cross > crossings
+
+flag *G:
+ [^E] > ING # As in cross > crossing
+
+flag *S:
+ [^SXZHY] > S # As in bat > bats
+
+flag *A:
+ [^AEIOU]Y > -Y,IES # As in imply > implies
+
+flag ~\\:
+ [^Y] > Y #~ advarsel > advarsely-
diff --git a/src/backend/tsearch/ispell_sample.dict b/src/backend/tsearch/ispell_sample.dict
new file mode 100644
index 00000000000..44df1967a6c
--- /dev/null
+++ b/src/backend/tsearch/ispell_sample.dict
@@ -0,0 +1,8 @@
+book/GJUS
+booking/SB
+footballklubber
+foot/ZS
+football/Z
+ball/SZ\
+klubber/Z
+sky/A
diff --git a/src/backend/tsearch/synonym.syn.sample b/src/backend/tsearch/synonym.syn.sample
deleted file mode 100644
index fdccca102b4..00000000000
--- a/src/backend/tsearch/synonym.syn.sample
+++ /dev/null
@@ -1,3 +0,0 @@
-skies sky
-booking book
-bookings book
diff --git a/src/backend/tsearch/synonym_sample.syn b/src/backend/tsearch/synonym_sample.syn
new file mode 100644
index 00000000000..4e2eaeec0c1
--- /dev/null
+++ b/src/backend/tsearch/synonym_sample.syn
@@ -0,0 +1,4 @@
+postgres pgsql
+postgresql pgsql
+postgre pgsql
+gogle googl
diff --git a/src/backend/tsearch/thesaurus.ths.sample b/src/backend/tsearch/thesaurus_sample.ths
index 7e7702e2ae4..b83d8f1452a 100644
--- a/src/backend/tsearch/thesaurus.ths.sample
+++ b/src/backend/tsearch/thesaurus_sample.ths
@@ -11,10 +11,8 @@ one two : *12
one : *1
two : *2
-#foo bar : blah blah
-#f bar : fbar
-#e bar : ebar
-#g bar bar : gbarbar
-#asd:sdffff
-#qwerty:qwer wert erty
+supernovae stars : *sn
+supernovae : *sn
+booking tickets : order invitation cards
+booking the tickets : order invitation Cards
diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out
new file mode 100644
index 00000000000..859c7bb5226
--- /dev/null
+++ b/src/test/regress/expected/tsdicts.out
@@ -0,0 +1,320 @@
+--Test text search dictionaries and configurations
+-- Test ISpell dictionary with ispell affix file
+CREATE TEXT SEARCH DICTIONARY ispell (
+ Template=ispell,
+ DictFile=ispell_sample,
+ AffFile=ispell_sample
+);
+SELECT ts_lexize('ispell', 'skies');
+ ts_lexize
+-----------
+ {sky}
+(1 row)
+
+SELECT ts_lexize('ispell', 'bookings');
+ ts_lexize
+----------------
+ {booking,book}
+(1 row)
+
+SELECT ts_lexize('ispell', 'booking');
+ ts_lexize
+----------------
+ {booking,book}
+(1 row)
+
+SELECT ts_lexize('ispell', 'foot');
+ ts_lexize
+-----------
+ {foot}
+(1 row)
+
+SELECT ts_lexize('ispell', 'foots');
+ ts_lexize
+-----------
+ {foot}
+(1 row)
+
+SELECT ts_lexize('ispell', 'rebookings');
+ ts_lexize
+----------------
+ {booking,book}
+(1 row)
+
+SELECT ts_lexize('ispell', 'rebooking');
+ ts_lexize
+----------------
+ {booking,book}
+(1 row)
+
+SELECT ts_lexize('ispell', 'rebook');
+ ts_lexize
+-----------
+
+(1 row)
+
+SELECT ts_lexize('ispell', 'unbookings');
+ ts_lexize
+-----------
+ {book}
+(1 row)
+
+SELECT ts_lexize('ispell', 'unbooking');
+ ts_lexize
+-----------
+ {book}
+(1 row)
+
+SELECT ts_lexize('ispell', 'unbook');
+ ts_lexize
+-----------
+ {book}
+(1 row)
+
+SELECT ts_lexize('ispell', 'footklubber');
+ ts_lexize
+----------------
+ {foot,klubber}
+(1 row)
+
+SELECT ts_lexize('ispell', 'footballklubber');
+ ts_lexize
+------------------------------------------------------
+ {footballklubber,foot,ball,klubber,football,klubber}
+(1 row)
+
+SELECT ts_lexize('ispell', 'ballyklubber');
+ ts_lexize
+----------------
+ {ball,klubber}
+(1 row)
+
+SELECT ts_lexize('ispell', 'footballyklubber');
+ ts_lexize
+---------------------
+ {foot,ball,klubber}
+(1 row)
+
+-- Test ISpell dictionary with hunspell affix file
+CREATE TEXT SEARCH DICTIONARY hunspell (
+ Template=ispell,
+ DictFile=ispell_sample,
+ AffFile=hunspell_sample
+);
+SELECT ts_lexize('hunspell', 'skies');
+ ts_lexize
+-----------
+ {sky}
+(1 row)
+
+SELECT ts_lexize('hunspell', 'bookings');
+ ts_lexize
+----------------
+ {booking,book}
+(1 row)
+
+SELECT ts_lexize('hunspell', 'booking');
+ ts_lexize
+----------------
+ {booking,book}
+(1 row)
+
+SELECT ts_lexize('hunspell', 'foot');
+ ts_lexize
+-----------
+ {foot}
+(1 row)
+
+SELECT ts_lexize('hunspell', 'foots');
+ ts_lexize
+-----------
+ {foot}
+(1 row)
+
+SELECT ts_lexize('hunspell', 'rebookings');
+ ts_lexize
+----------------
+ {booking,book}
+(1 row)
+
+SELECT ts_lexize('hunspell', 'rebooking');
+ ts_lexize
+----------------
+ {booking,book}
+(1 row)
+
+SELECT ts_lexize('hunspell', 'rebook');
+ ts_lexize
+-----------
+
+(1 row)
+
+SELECT ts_lexize('hunspell', 'unbookings');
+ ts_lexize
+-----------
+ {book}
+(1 row)
+
+SELECT ts_lexize('hunspell', 'unbooking');
+ ts_lexize
+-----------
+ {book}
+(1 row)
+
+SELECT ts_lexize('hunspell', 'unbook');
+ ts_lexize
+-----------
+ {book}
+(1 row)
+
+SELECT ts_lexize('hunspell', 'footklubber');
+ ts_lexize
+----------------
+ {foot,klubber}
+(1 row)
+
+SELECT ts_lexize('hunspell', 'footballklubber');
+ ts_lexize
+------------------------------------------------------
+ {footballklubber,foot,ball,klubber,football,klubber}
+(1 row)
+
+SELECT ts_lexize('hunspell', 'ballyklubber');
+ ts_lexize
+----------------
+ {ball,klubber}
+(1 row)
+
+SELECT ts_lexize('hunspell', 'footballyklubber');
+ ts_lexize
+---------------------
+ {foot,ball,klubber}
+(1 row)
+
+-- Synonim dictionary
+CREATE TEXT SEARCH DICTIONARY synonym (
+ Template=synonym,
+ Synonyms=synonym_sample
+);
+SELECT ts_lexize('synonym', 'PoStGrEs');
+ ts_lexize
+-----------
+ {pgsql}
+(1 row)
+
+SELECT ts_lexize('synonym', 'Gogle');
+ ts_lexize
+-----------
+ {googl}
+(1 row)
+
+-- Create and simple test thesaurus dictionary
+-- More test in configuration checks because of ts_lexize
+-- can not give more tat one word as it may wish thesaurus.
+CREATE TEXT SEARCH DICTIONARY thesaurus (
+ Template=thesaurus,
+ DictFile=thesaurus_sample,
+ Dictionary=english_stem
+);
+NOTICE: thesaurus word-sample "the" is recognized as stop-word, assign any stop-word (rule 8)
+SELECT ts_lexize('thesaurus', 'one');
+NOTICE: thesaurus word-sample "the" is recognized as stop-word, assign any stop-word (rule 8)
+ ts_lexize
+-----------
+ {1}
+(1 row)
+
+-- Test ispell dictionary in configuration
+CREATE TEXT SEARCH CONFIGURATION ispell_tst (
+ COPY=english
+);
+ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR
+ hword, lhword, lpart_hword, lword, nlhword, nlpart_hword, nlword, part_hword, word
+ WITH ispell, english_stem;
+SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
+ to_tsvector
+----------------------------------------------------------------------------------------------------
+ 'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7
+(1 row)
+
+SELECT to_tsquery('ispell_tst', 'footballklubber');
+ to_tsquery
+------------------------------------------------------------------------------
+ ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber'
+(1 row)
+
+SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky');
+ to_tsquery
+------------------------------------------------------------------------
+ 'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
+(1 row)
+
+-- Test ispell dictionary with hunspell affix in configuration
+CREATE TEXT SEARCH CONFIGURATION hunspell_tst (
+ COPY=ispell_tst
+);
+ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
+ REPLACE ispell WITH hunspell;
+SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
+ to_tsvector
+----------------------------------------------------------------------------------------------------
+ 'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7
+(1 row)
+
+SELECT to_tsquery('hunspell_tst', 'footballklubber');
+ to_tsquery
+------------------------------------------------------------------------------
+ ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber'
+(1 row)
+
+SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
+ to_tsquery
+------------------------------------------------------------------------
+ 'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
+(1 row)
+
+-- Test synonym dictionary in configuration
+CREATE TEXT SEARCH CONFIGURATION synonym_tst (
+ COPY=english
+);
+ALTER TEXT SEARCH CONFIGURATION synonym_tst ALTER MAPPING FOR
+ lword, lpart_hword, lhword
+ WITH synonym, english_stem;
+SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgsql and pronounced as postgre');
+ to_tsvector
+---------------------------------------------------
+ 'call':4 'often':3 'pgsql':1,6,8,12 'pronounc':10
+(1 row)
+
+SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google');
+ to_tsvector
+----------------------------------------------------------
+ 'googl':7,10 'write':6 'common':2 'mistak':3 'instead':8
+(1 row)
+
+-- test thesaurus in configuration
+-- see thesaurus_sample.ths to understand 'odd' resulting tsvector
+CREATE TEXT SEARCH CONFIGURATION thesaurus_tst (
+ COPY=synonym_tst
+);
+ALTER TEXT SEARCH CONFIGURATION thesaurus_tst ALTER MAPPING FOR
+ lword, lpart_hword, lhword
+ WITH synonym, thesaurus, english_stem;
+SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one');
+ to_tsvector
+----------------------------------
+ '1':1,5 '12':3 '123':4 'pgsql':2
+(1 row)
+
+SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbrevation SN)');
+ to_tsvector
+-------------------------------------------------------------
+ 'sn':1,9,11 'new':4 'call':8 'star':5 'usual':7 'abbrev':10
+(1 row)
+
+SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets');
+ to_tsvector
+-------------------------------------------------------
+ 'card':3,10 'like':6 'look':5 'invit':2,9 'order':1,8
+(1 row)
+
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index a8f5c799d9a..4d5af5b16d5 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -1,5 +1,5 @@
# ----------
-# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.43 2007/08/21 01:11:30 tgl Exp $
+# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.44 2007/09/11 11:54:42 teodor Exp $
#
# By convention, we put no more than twenty tests in any one parallel group;
# this limits the number of connections needed to run the tests.
@@ -77,7 +77,7 @@ test: misc
# ----------
# Another group of parallel tests
# ----------
-test: select_views portals_p2 rules foreign_key cluster dependency guc combocid tsearch
+test: select_views portals_p2 rules foreign_key cluster dependency guc combocid tsearch tsdicts
# ----------
# Another group of parallel tests
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index a11a8702334..856682469cc 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -1,4 +1,4 @@
-# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.40 2007/08/21 01:11:30 tgl Exp $
+# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.41 2007/09/11 11:54:42 teodor Exp $
# This should probably be in an order similar to parallel_schedule.
test: boolean
test: char
@@ -102,6 +102,7 @@ test: rangefuncs
test: prepare
test: without_oid
test: conversion
+test: tsdicts
test: truncate
test: alter_table
test: sequence
diff --git a/src/test/regress/sql/tsdicts.sql b/src/test/regress/sql/tsdicts.sql
new file mode 100644
index 00000000000..2e6cf791d87
--- /dev/null
+++ b/src/test/regress/sql/tsdicts.sql
@@ -0,0 +1,121 @@
+--Test text search dictionaries and configurations
+
+-- Test ISpell dictionary with ispell affix file
+CREATE TEXT SEARCH DICTIONARY ispell (
+ Template=ispell,
+ DictFile=ispell_sample,
+ AffFile=ispell_sample
+);
+
+SELECT ts_lexize('ispell', 'skies');
+SELECT ts_lexize('ispell', 'bookings');
+SELECT ts_lexize('ispell', 'booking');
+SELECT ts_lexize('ispell', 'foot');
+SELECT ts_lexize('ispell', 'foots');
+SELECT ts_lexize('ispell', 'rebookings');
+SELECT ts_lexize('ispell', 'rebooking');
+SELECT ts_lexize('ispell', 'rebook');
+SELECT ts_lexize('ispell', 'unbookings');
+SELECT ts_lexize('ispell', 'unbooking');
+SELECT ts_lexize('ispell', 'unbook');
+
+SELECT ts_lexize('ispell', 'footklubber');
+SELECT ts_lexize('ispell', 'footballklubber');
+SELECT ts_lexize('ispell', 'ballyklubber');
+SELECT ts_lexize('ispell', 'footballyklubber');
+
+-- Test ISpell dictionary with hunspell affix file
+CREATE TEXT SEARCH DICTIONARY hunspell (
+ Template=ispell,
+ DictFile=ispell_sample,
+ AffFile=hunspell_sample
+);
+
+SELECT ts_lexize('hunspell', 'skies');
+SELECT ts_lexize('hunspell', 'bookings');
+SELECT ts_lexize('hunspell', 'booking');
+SELECT ts_lexize('hunspell', 'foot');
+SELECT ts_lexize('hunspell', 'foots');
+SELECT ts_lexize('hunspell', 'rebookings');
+SELECT ts_lexize('hunspell', 'rebooking');
+SELECT ts_lexize('hunspell', 'rebook');
+SELECT ts_lexize('hunspell', 'unbookings');
+SELECT ts_lexize('hunspell', 'unbooking');
+SELECT ts_lexize('hunspell', 'unbook');
+
+SELECT ts_lexize('hunspell', 'footklubber');
+SELECT ts_lexize('hunspell', 'footballklubber');
+SELECT ts_lexize('hunspell', 'ballyklubber');
+SELECT ts_lexize('hunspell', 'footballyklubber');
+
+-- Synonim dictionary
+CREATE TEXT SEARCH DICTIONARY synonym (
+ Template=synonym,
+ Synonyms=synonym_sample
+);
+
+SELECT ts_lexize('synonym', 'PoStGrEs');
+SELECT ts_lexize('synonym', 'Gogle');
+
+-- Create and simple test thesaurus dictionary
+-- More test in configuration checks because of ts_lexize
+-- can not give more tat one word as it may wish thesaurus.
+CREATE TEXT SEARCH DICTIONARY thesaurus (
+ Template=thesaurus,
+ DictFile=thesaurus_sample,
+ Dictionary=english_stem
+);
+
+SELECT ts_lexize('thesaurus', 'one');
+
+-- Test ispell dictionary in configuration
+CREATE TEXT SEARCH CONFIGURATION ispell_tst (
+ COPY=english
+);
+
+ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR
+ hword, lhword, lpart_hword, lword, nlhword, nlpart_hword, nlword, part_hword, word
+ WITH ispell, english_stem;
+
+SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
+SELECT to_tsquery('ispell_tst', 'footballklubber');
+SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky');
+
+-- Test ispell dictionary with hunspell affix in configuration
+CREATE TEXT SEARCH CONFIGURATION hunspell_tst (
+ COPY=ispell_tst
+);
+
+ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
+ REPLACE ispell WITH hunspell;
+
+SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
+SELECT to_tsquery('hunspell_tst', 'footballklubber');
+SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
+
+-- Test synonym dictionary in configuration
+CREATE TEXT SEARCH CONFIGURATION synonym_tst (
+ COPY=english
+);
+
+ALTER TEXT SEARCH CONFIGURATION synonym_tst ALTER MAPPING FOR
+ lword, lpart_hword, lhword
+ WITH synonym, english_stem;
+
+SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgsql and pronounced as postgre');
+SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google');
+
+-- test thesaurus in configuration
+-- see thesaurus_sample.ths to understand 'odd' resulting tsvector
+CREATE TEXT SEARCH CONFIGURATION thesaurus_tst (
+ COPY=synonym_tst
+);
+
+ALTER TEXT SEARCH CONFIGURATION thesaurus_tst ALTER MAPPING FOR
+ lword, lpart_hword, lhword
+ WITH synonym, thesaurus, english_stem;
+
+SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one');
+SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbrevation SN)');
+SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets');
+