aboutsummaryrefslogtreecommitdiff
path: root/src/test/regress/sql/collate.icu.utf8.sql
diff options
context:
space:
mode:
Diffstat (limited to 'src/test/regress/sql/collate.icu.utf8.sql')
-rw-r--r--src/test/regress/sql/collate.icu.utf8.sql55
1 files changed, 52 insertions, 3 deletions
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 0c9491c260e..797e93ac714 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -514,6 +514,12 @@ CREATE COLLATION testcoll_rulesx (provider = icu, locale = '', rules = '!!wrong!
CREATE COLLATION ctest_det (provider = icu, locale = '', deterministic = true);
CREATE COLLATION ctest_nondet (provider = icu, locale = '', deterministic = false);
+SELECT 'abc' LIKE 'abc' COLLATE ctest_det;
+SELECT 'abc' LIKE 'a\bc' COLLATE ctest_det;
+
+SELECT 'abc' LIKE 'abc' COLLATE ctest_nondet;
+SELECT 'abc' LIKE 'a\bc' COLLATE ctest_nondet;
+
CREATE TABLE test6 (a int, b text);
-- same string in different normal forms
INSERT INTO test6 VALUES (1, U&'\00E4bc');
@@ -522,6 +528,9 @@ SELECT * FROM test6;
SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_det;
SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet;
+SELECT * FROM test6 WHERE b LIKE 'äbc' COLLATE ctest_det;
+SELECT * FROM test6 WHERE b LIKE 'äbc' COLLATE ctest_nondet;
+
-- same with arrays
CREATE TABLE test6a (a int, b text[]);
INSERT INTO test6a VALUES (1, ARRAY[U&'\00E4bc']);
@@ -637,14 +646,14 @@ SELECT string_to_array('ABCDEFGHI'::char(9) COLLATE case_insensitive, NULL, 'b')
-- This tests the issue described in match_pattern_prefix(). In the
-- absence of that check, the case_insensitive tests below would
-- return no rows where they should logically return one.
-CREATE TABLE test4c (x text COLLATE "C");
+CREATE TABLE test4c (x text COLLATE case_insensitive);
INSERT INTO test4c VALUES ('abc');
CREATE INDEX ON test4c (x);
SET enable_seqscan = off;
SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_sensitive; -- ok, no rows
SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_sensitive; -- ok, no rows
-SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_insensitive; -- error
-SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_insensitive; -- error
+SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_insensitive; -- ok
+SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_insensitive; -- ok
RESET enable_seqscan;
-- Unicode special case: different variants of Greek lower case sigma.
@@ -687,6 +696,46 @@ SELECT * FROM test4 WHERE b = 'cote' COLLATE ignore_accents;
SELECT * FROM test4 WHERE b = 'Cote' COLLATE ignore_accents; -- still case-sensitive
SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive;
+-- This is a tricky one. A naive implementation would first test
+-- \00E4 matches \0061, which is true under ignore_accents, but then
+-- the rest of the string won't match anymore. Therefore, the
+-- algorithm has to test whether the rest of the string matches, and
+-- if not try matching \00E4 against a longer substring like
+-- \0061\0308, which will then work out.
+SELECT U&'\0061\0308bc' LIKE U&'\00E4_c' COLLATE ignore_accents;
+-- and in reverse:
+SELECT U&'\00E4bc' LIKE U&'\0061\0308_c' COLLATE ignore_accents;
+-- inner % matches b:
+SELECT U&'\0061\0308bc' LIKE U&'\00E4%c' COLLATE ignore_accents;
+-- inner %% matches b then zero:
+SELECT U&'\0061\0308bc' LIKE U&'\00E4%%c' COLLATE ignore_accents;
+-- inner %% matches b then zero:
+SELECT U&'cb\0061\0308' LIKE U&'c%%\00E4' COLLATE ignore_accents;
+-- trailing _ matches two codepoints that form one grapheme:
+SELECT U&'cb\0061\0308' LIKE U&'cb_' COLLATE ignore_accents;
+-- trailing __ matches two codepoints that form one grapheme:
+SELECT U&'cb\0061\0308' LIKE U&'cb__' COLLATE ignore_accents;
+-- leading % matches zero:
+SELECT U&'\0061\0308bc' LIKE U&'%\00E4bc' COLLATE ignore_accents;
+-- leading % matches zero (with later %):
+SELECT U&'\0061\0308bc' LIKE U&'%\00E4%c' COLLATE ignore_accents;
+-- trailing % matches zero:
+SELECT U&'\0061\0308bc' LIKE U&'\00E4bc%' COLLATE ignore_accents;
+-- trailing % matches zero (with previous %):
+SELECT U&'\0061\0308bc' LIKE U&'\00E4%c%' COLLATE ignore_accents;
+-- _ versus two codepoints that form one grapheme:
+SELECT U&'\0061\0308bc' LIKE U&'_bc' COLLATE ignore_accents;
+-- (actually this matches because)
+SELECT U&'\0308bc' = 'bc' COLLATE ignore_accents;
+-- __ matches two codepoints that form one grapheme:
+SELECT U&'\0061\0308bc' LIKE U&'__bc' COLLATE ignore_accents;
+-- _ matches one codepoint that forms half a grapheme:
+SELECT U&'\0061\0308bc' LIKE U&'_\0308bc' COLLATE ignore_accents;
+-- doesn't match because \00e4 doesn't match only \0308
+SELECT U&'\0061\0308bc' LIKE U&'_\00e4bc' COLLATE ignore_accents;
+-- escape character at end of pattern
+SELECT 'foox' LIKE 'foo\' COLLATE ignore_accents;
+
-- foreign keys (mixing different nondeterministic collations not allowed)
CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY);
CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON UPDATE CASCADE ON DELETE CASCADE); -- error