aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/citext/Makefile2
-rw-r--r--contrib/citext/expected/citext.out29
-rw-r--r--contrib/citext/expected/citext_1.out29
-rw-r--r--contrib/citext/expected/citext_utf8.out146
-rw-r--r--contrib/citext/expected/citext_utf8_1.out9
-rw-r--r--contrib/citext/sql/citext.sql35
-rw-r--r--contrib/citext/sql/citext_utf8.sql51
7 files changed, 207 insertions, 94 deletions
diff --git a/contrib/citext/Makefile b/contrib/citext/Makefile
index a7de52928d7..789932fe366 100644
--- a/contrib/citext/Makefile
+++ b/contrib/citext/Makefile
@@ -11,7 +11,7 @@ DATA = citext--1.4.sql \
citext--1.0--1.1.sql
PGFILEDESC = "citext - case-insensitive character string data type"
-REGRESS = citext
+REGRESS = citext citext_utf8
ifdef USE_PGXS
PG_CONFIG = pg_config
diff --git a/contrib/citext/expected/citext.out b/contrib/citext/expected/citext.out
index 3bac0534fb8..5afcc50920e 100644
--- a/contrib/citext/expected/citext.out
+++ b/contrib/citext/expected/citext.out
@@ -48,29 +48,6 @@ SELECT 'a'::citext <> 'ab'::citext AS t;
t
(1 row)
--- Multibyte sanity tests. Uncomment to run.
--- SELECT 'À'::citext = 'À'::citext AS t;
--- SELECT 'À'::citext = 'à'::citext AS t;
--- SELECT 'À'::text = 'à'::text AS f; -- text wins.
--- SELECT 'À'::citext <> 'B'::citext AS t;
--- Test combining characters making up canonically equivalent strings.
--- SELECT 'Ä'::text <> 'Ä'::text AS t;
--- SELECT 'Ä'::citext <> 'Ä'::citext AS t;
--- Test the Turkish dotted I. The lowercase is a single byte while the
--- uppercase is multibyte. This is why the comparison code can't be optimized
--- to compare string lengths.
--- SELECT 'i'::citext = 'İ'::citext AS t;
--- Regression.
--- SELECT 'láska'::citext <> 'laská'::citext AS t;
--- SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
--- SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
--- SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
--- SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
--- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS zero;
--- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) AS zero;
--- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) AS zero;
--- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) AS positive;
--- SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS negative;
-- Test > and >=
SELECT 'B'::citext > 'a'::citext AS t;
t
@@ -2614,8 +2591,6 @@ SELECT citext_pattern_ge('b'::citext, 'A'::citext) AS true;
t
(1 row)
--- Multi-byte tests below are disabled like the sanity tests above.
--- Uncomment to run them.
-- Test ~<~ and ~<=~
SELECT 'a'::citext ~<~ 'B'::citext AS t;
t
@@ -2629,7 +2604,6 @@ SELECT 'b'::citext ~<~ 'A'::citext AS f;
f
(1 row)
--- SELECT 'à'::citext ~<~ 'À'::citext AS f;
SELECT 'a'::citext ~<=~ 'B'::citext AS t;
t
---
@@ -2642,7 +2616,6 @@ SELECT 'a'::citext ~<=~ 'A'::citext AS t;
t
(1 row)
--- SELECT 'à'::citext ~<=~ 'À'::citext AS t;
-- Test ~>~ and ~>=~
SELECT 'B'::citext ~>~ 'a'::citext AS t;
t
@@ -2656,7 +2629,6 @@ SELECT 'b'::citext ~>~ 'A'::citext AS t;
t
(1 row)
--- SELECT 'à'::citext ~>~ 'À'::citext AS f;
SELECT 'B'::citext ~>~ 'b'::citext AS f;
f
---
@@ -2669,7 +2641,6 @@ SELECT 'B'::citext ~>=~ 'b'::citext AS t;
t
(1 row)
--- SELECT 'à'::citext ~>=~ 'À'::citext AS t;
-- Test implicit casting. citext casts to text, but not vice-versa.
SELECT 'B'::citext ~<~ 'a'::text AS t; -- text wins.
t
diff --git a/contrib/citext/expected/citext_1.out b/contrib/citext/expected/citext_1.out
index 57fc863f7a5..8aa2b9e1dbc 100644
--- a/contrib/citext/expected/citext_1.out
+++ b/contrib/citext/expected/citext_1.out
@@ -48,29 +48,6 @@ SELECT 'a'::citext <> 'ab'::citext AS t;
t
(1 row)
--- Multibyte sanity tests. Uncomment to run.
--- SELECT 'À'::citext = 'À'::citext AS t;
--- SELECT 'À'::citext = 'à'::citext AS t;
--- SELECT 'À'::text = 'à'::text AS f; -- text wins.
--- SELECT 'À'::citext <> 'B'::citext AS t;
--- Test combining characters making up canonically equivalent strings.
--- SELECT 'Ä'::text <> 'Ä'::text AS t;
--- SELECT 'Ä'::citext <> 'Ä'::citext AS t;
--- Test the Turkish dotted I. The lowercase is a single byte while the
--- uppercase is multibyte. This is why the comparison code can't be optimized
--- to compare string lengths.
--- SELECT 'i'::citext = 'İ'::citext AS t;
--- Regression.
--- SELECT 'láska'::citext <> 'laská'::citext AS t;
--- SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
--- SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
--- SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
--- SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
--- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS zero;
--- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) AS zero;
--- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) AS zero;
--- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) AS positive;
--- SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS negative;
-- Test > and >=
SELECT 'B'::citext > 'a'::citext AS t;
t
@@ -2614,8 +2591,6 @@ SELECT citext_pattern_ge('b'::citext, 'A'::citext) AS true;
t
(1 row)
--- Multi-byte tests below are disabled like the sanity tests above.
--- Uncomment to run them.
-- Test ~<~ and ~<=~
SELECT 'a'::citext ~<~ 'B'::citext AS t;
t
@@ -2629,7 +2604,6 @@ SELECT 'b'::citext ~<~ 'A'::citext AS f;
f
(1 row)
--- SELECT 'à'::citext ~<~ 'À'::citext AS f;
SELECT 'a'::citext ~<=~ 'B'::citext AS t;
t
---
@@ -2642,7 +2616,6 @@ SELECT 'a'::citext ~<=~ 'A'::citext AS t;
t
(1 row)
--- SELECT 'à'::citext ~<=~ 'À'::citext AS t;
-- Test ~>~ and ~>=~
SELECT 'B'::citext ~>~ 'a'::citext AS t;
t
@@ -2656,7 +2629,6 @@ SELECT 'b'::citext ~>~ 'A'::citext AS t;
t
(1 row)
--- SELECT 'à'::citext ~>~ 'À'::citext AS f;
SELECT 'B'::citext ~>~ 'b'::citext AS f;
f
---
@@ -2669,7 +2641,6 @@ SELECT 'B'::citext ~>=~ 'b'::citext AS t;
t
(1 row)
--- SELECT 'à'::citext ~>=~ 'À'::citext AS t;
-- Test implicit casting. citext casts to text, but not vice-versa.
SELECT 'B'::citext ~<~ 'a'::text AS t; -- text wins.
t
diff --git a/contrib/citext/expected/citext_utf8.out b/contrib/citext/expected/citext_utf8.out
new file mode 100644
index 00000000000..666b07ccec4
--- /dev/null
+++ b/contrib/citext/expected/citext_utf8.out
@@ -0,0 +1,146 @@
+/*
+ * This test must be run in a database with UTF-8 encoding
+ * and a Unicode-aware locale.
+ */
+SELECT getdatabaseencoding() <> 'UTF8' OR
+ current_setting('lc_ctype') = 'C'
+ AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+set client_encoding = utf8;
+-- CREATE EXTENSION IF NOT EXISTS citext;
+-- Multibyte sanity tests.
+SELECT 'À'::citext = 'À'::citext AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT 'À'::citext = 'à'::citext AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT 'À'::text = 'à'::text AS f; -- text wins.
+ f
+---
+ f
+(1 row)
+
+SELECT 'À'::citext <> 'B'::citext AS t;
+ t
+---
+ t
+(1 row)
+
+-- Test combining characters making up canonically equivalent strings.
+SELECT 'Ä'::text <> 'Ä'::text AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT 'Ä'::citext <> 'Ä'::citext AS t;
+ t
+---
+ t
+(1 row)
+
+-- Test the Turkish dotted I. The lowercase is a single byte while the
+-- uppercase is multibyte. This is why the comparison code can't be optimized
+-- to compare string lengths.
+SELECT 'i'::citext = 'İ'::citext AS t;
+ t
+---
+ t
+(1 row)
+
+-- Regression.
+SELECT 'láska'::citext <> 'laská'::citext AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) = 0 AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) = 0 AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) = 0 AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) > 0 AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) < 0 AS t;
+ t
+---
+ t
+(1 row)
+
+-- Test ~<~ and ~<=~
+SELECT 'à'::citext ~<~ 'À'::citext AS f;
+ f
+---
+ f
+(1 row)
+
+SELECT 'à'::citext ~<=~ 'À'::citext AS t;
+ t
+---
+ t
+(1 row)
+
+-- Test ~>~ and ~>=~
+SELECT 'à'::citext ~>~ 'À'::citext AS f;
+ f
+---
+ f
+(1 row)
+
+SELECT 'à'::citext ~>=~ 'À'::citext AS t;
+ t
+---
+ t
+(1 row)
+
diff --git a/contrib/citext/expected/citext_utf8_1.out b/contrib/citext/expected/citext_utf8_1.out
new file mode 100644
index 00000000000..433e9853497
--- /dev/null
+++ b/contrib/citext/expected/citext_utf8_1.out
@@ -0,0 +1,9 @@
+/*
+ * This test must be run in a database with UTF-8 encoding
+ * and a Unicode-aware locale.
+ */
+SELECT getdatabaseencoding() <> 'UTF8' OR
+ current_setting('lc_ctype') = 'C'
+ AS skip_test \gset
+\if :skip_test
+\quit
diff --git a/contrib/citext/sql/citext.sql b/contrib/citext/sql/citext.sql
index 55fb1d11a6f..8c87be6b1d2 100644
--- a/contrib/citext/sql/citext.sql
+++ b/contrib/citext/sql/citext.sql
@@ -19,34 +19,6 @@ SELECT 'a'::citext = 'b'::citext AS f;
SELECT 'a'::citext = 'ab'::citext AS f;
SELECT 'a'::citext <> 'ab'::citext AS t;
--- Multibyte sanity tests. Uncomment to run.
--- SELECT 'À'::citext = 'À'::citext AS t;
--- SELECT 'À'::citext = 'à'::citext AS t;
--- SELECT 'À'::text = 'à'::text AS f; -- text wins.
--- SELECT 'À'::citext <> 'B'::citext AS t;
-
--- Test combining characters making up canonically equivalent strings.
--- SELECT 'Ä'::text <> 'Ä'::text AS t;
--- SELECT 'Ä'::citext <> 'Ä'::citext AS t;
-
--- Test the Turkish dotted I. The lowercase is a single byte while the
--- uppercase is multibyte. This is why the comparison code can't be optimized
--- to compare string lengths.
--- SELECT 'i'::citext = 'İ'::citext AS t;
-
--- Regression.
--- SELECT 'láska'::citext <> 'laská'::citext AS t;
-
--- SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
--- SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
--- SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
--- SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
--- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS zero;
--- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) AS zero;
--- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) AS zero;
--- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) AS positive;
--- SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS negative;
-
-- Test > and >=
SELECT 'B'::citext > 'a'::citext AS t;
SELECT 'b'::citext > 'A'::citext AS t;
@@ -811,24 +783,17 @@ SELECT citext_pattern_ge('b'::citext, 'a'::citext) AS true;
SELECT citext_pattern_ge('B'::citext, 'a'::citext) AS true;
SELECT citext_pattern_ge('b'::citext, 'A'::citext) AS true;
--- Multi-byte tests below are disabled like the sanity tests above.
--- Uncomment to run them.
-
-- Test ~<~ and ~<=~
SELECT 'a'::citext ~<~ 'B'::citext AS t;
SELECT 'b'::citext ~<~ 'A'::citext AS f;
--- SELECT 'à'::citext ~<~ 'À'::citext AS f;
SELECT 'a'::citext ~<=~ 'B'::citext AS t;
SELECT 'a'::citext ~<=~ 'A'::citext AS t;
--- SELECT 'à'::citext ~<=~ 'À'::citext AS t;
-- Test ~>~ and ~>=~
SELECT 'B'::citext ~>~ 'a'::citext AS t;
SELECT 'b'::citext ~>~ 'A'::citext AS t;
--- SELECT 'à'::citext ~>~ 'À'::citext AS f;
SELECT 'B'::citext ~>~ 'b'::citext AS f;
SELECT 'B'::citext ~>=~ 'b'::citext AS t;
--- SELECT 'à'::citext ~>=~ 'À'::citext AS t;
-- Test implicit casting. citext casts to text, but not vice-versa.
SELECT 'B'::citext ~<~ 'a'::text AS t; -- text wins.
diff --git a/contrib/citext/sql/citext_utf8.sql b/contrib/citext/sql/citext_utf8.sql
new file mode 100644
index 00000000000..d068000b423
--- /dev/null
+++ b/contrib/citext/sql/citext_utf8.sql
@@ -0,0 +1,51 @@
+/*
+ * This test must be run in a database with UTF-8 encoding
+ * and a Unicode-aware locale.
+ */
+
+SELECT getdatabaseencoding() <> 'UTF8' OR
+ current_setting('lc_ctype') = 'C'
+ AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+
+set client_encoding = utf8;
+
+-- CREATE EXTENSION IF NOT EXISTS citext;
+
+-- Multibyte sanity tests.
+SELECT 'À'::citext = 'À'::citext AS t;
+SELECT 'À'::citext = 'à'::citext AS t;
+SELECT 'À'::text = 'à'::text AS f; -- text wins.
+SELECT 'À'::citext <> 'B'::citext AS t;
+
+-- Test combining characters making up canonically equivalent strings.
+SELECT 'Ä'::text <> 'Ä'::text AS t;
+SELECT 'Ä'::citext <> 'Ä'::citext AS t;
+
+-- Test the Turkish dotted I. The lowercase is a single byte while the
+-- uppercase is multibyte. This is why the comparison code can't be optimized
+-- to compare string lengths.
+SELECT 'i'::citext = 'İ'::citext AS t;
+
+-- Regression.
+SELECT 'láska'::citext <> 'laská'::citext AS t;
+
+SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t;
+SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t;
+SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t;
+SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t;
+SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) = 0 AS t;
+SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) = 0 AS t;
+SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) = 0 AS t;
+SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) > 0 AS t;
+SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) < 0 AS t;
+
+-- Test ~<~ and ~<=~
+SELECT 'à'::citext ~<~ 'À'::citext AS f;
+SELECT 'à'::citext ~<=~ 'À'::citext AS t;
+
+-- Test ~>~ and ~>=~
+SELECT 'à'::citext ~>~ 'À'::citext AS f;
+SELECT 'à'::citext ~>=~ 'À'::citext AS t;