diff options
Diffstat (limited to 'src/test/regress/sql/collate.icu.utf8.sql')
-rw-r--r-- | src/test/regress/sql/collate.icu.utf8.sql | 55 |
1 files changed, 52 insertions, 3 deletions
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 0c9491c260e..797e93ac714 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -514,6 +514,12 @@ CREATE COLLATION testcoll_rulesx (provider = icu, locale = '', rules = '!!wrong! CREATE COLLATION ctest_det (provider = icu, locale = '', deterministic = true); CREATE COLLATION ctest_nondet (provider = icu, locale = '', deterministic = false); +SELECT 'abc' LIKE 'abc' COLLATE ctest_det; +SELECT 'abc' LIKE 'a\bc' COLLATE ctest_det; + +SELECT 'abc' LIKE 'abc' COLLATE ctest_nondet; +SELECT 'abc' LIKE 'a\bc' COLLATE ctest_nondet; + CREATE TABLE test6 (a int, b text); -- same string in different normal forms INSERT INTO test6 VALUES (1, U&'\00E4bc'); @@ -522,6 +528,9 @@ SELECT * FROM test6; SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_det; SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet; +SELECT * FROM test6 WHERE b LIKE 'äbc' COLLATE ctest_det; +SELECT * FROM test6 WHERE b LIKE 'äbc' COLLATE ctest_nondet; + -- same with arrays CREATE TABLE test6a (a int, b text[]); INSERT INTO test6a VALUES (1, ARRAY[U&'\00E4bc']); @@ -637,14 +646,14 @@ SELECT string_to_array('ABCDEFGHI'::char(9) COLLATE case_insensitive, NULL, 'b') -- This tests the issue described in match_pattern_prefix(). In the -- absence of that check, the case_insensitive tests below would -- return no rows where they should logically return one. -CREATE TABLE test4c (x text COLLATE "C"); +CREATE TABLE test4c (x text COLLATE case_insensitive); INSERT INTO test4c VALUES ('abc'); CREATE INDEX ON test4c (x); SET enable_seqscan = off; SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_sensitive; -- ok, no rows SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_sensitive; -- ok, no rows -SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_insensitive; -- error -SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_insensitive; -- error +SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_insensitive; -- ok +SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_insensitive; -- ok RESET enable_seqscan; -- Unicode special case: different variants of Greek lower case sigma. @@ -687,6 +696,46 @@ SELECT * FROM test4 WHERE b = 'cote' COLLATE ignore_accents; SELECT * FROM test4 WHERE b = 'Cote' COLLATE ignore_accents; -- still case-sensitive SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive; +-- This is a tricky one. A naive implementation would first test +-- \00E4 matches \0061, which is true under ignore_accents, but then +-- the rest of the string won't match anymore. Therefore, the +-- algorithm has to test whether the rest of the string matches, and +-- if not try matching \00E4 against a longer substring like +-- \0061\0308, which will then work out. +SELECT U&'\0061\0308bc' LIKE U&'\00E4_c' COLLATE ignore_accents; +-- and in reverse: +SELECT U&'\00E4bc' LIKE U&'\0061\0308_c' COLLATE ignore_accents; +-- inner % matches b: +SELECT U&'\0061\0308bc' LIKE U&'\00E4%c' COLLATE ignore_accents; +-- inner %% matches b then zero: +SELECT U&'\0061\0308bc' LIKE U&'\00E4%%c' COLLATE ignore_accents; +-- inner %% matches b then zero: +SELECT U&'cb\0061\0308' LIKE U&'c%%\00E4' COLLATE ignore_accents; +-- trailing _ matches two codepoints that form one grapheme: +SELECT U&'cb\0061\0308' LIKE U&'cb_' COLLATE ignore_accents; +-- trailing __ matches two codepoints that form one grapheme: +SELECT U&'cb\0061\0308' LIKE U&'cb__' COLLATE ignore_accents; +-- leading % matches zero: +SELECT U&'\0061\0308bc' LIKE U&'%\00E4bc' COLLATE ignore_accents; +-- leading % matches zero (with later %): +SELECT U&'\0061\0308bc' LIKE U&'%\00E4%c' COLLATE ignore_accents; +-- trailing % matches zero: +SELECT U&'\0061\0308bc' LIKE U&'\00E4bc%' COLLATE ignore_accents; +-- trailing % matches zero (with previous %): +SELECT U&'\0061\0308bc' LIKE U&'\00E4%c%' COLLATE ignore_accents; +-- _ versus two codepoints that form one grapheme: +SELECT U&'\0061\0308bc' LIKE U&'_bc' COLLATE ignore_accents; +-- (actually this matches because) +SELECT U&'\0308bc' = 'bc' COLLATE ignore_accents; +-- __ matches two codepoints that form one grapheme: +SELECT U&'\0061\0308bc' LIKE U&'__bc' COLLATE ignore_accents; +-- _ matches one codepoint that forms half a grapheme: +SELECT U&'\0061\0308bc' LIKE U&'_\0308bc' COLLATE ignore_accents; +-- doesn't match because \00e4 doesn't match only \0308 +SELECT U&'\0061\0308bc' LIKE U&'_\00e4bc' COLLATE ignore_accents; +-- escape character at end of pattern +SELECT 'foox' LIKE 'foo\' COLLATE ignore_accents; + -- foreign keys (mixing different nondeterministic collations not allowed) CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY); CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON UPDATE CASCADE ON DELETE CASCADE); -- error |