aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/commands/collationcmds.c8
-rw-r--r--src/backend/utils/adt/pg_locale.c41
-rw-r--r--src/test/regress/expected/collate.icu.utf8.out8
-rw-r--r--src/test/regress/sql/collate.icu.utf8.sql4
4 files changed, 50 insertions, 11 deletions
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index 8949684afe1..3d0aea05685 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -950,7 +950,6 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
const char *name;
char *langtag;
char *icucomment;
- const char *iculocstr;
Oid collid;
if (i == -1)
@@ -959,20 +958,19 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
name = uloc_getAvailable(i);
langtag = get_icu_language_tag(name);
- iculocstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
/*
* Be paranoid about not allowing any non-ASCII strings into
* pg_collation
*/
- if (!pg_is_ascii(langtag) || !pg_is_ascii(iculocstr))
+ if (!pg_is_ascii(langtag))
continue;
collid = CollationCreate(psprintf("%s-x-icu", langtag),
nspid, GetUserId(),
COLLPROVIDER_ICU, true, -1,
- NULL, NULL, iculocstr, NULL,
- get_collation_actual_version(COLLPROVIDER_ICU, iculocstr),
+ NULL, NULL, langtag, NULL,
+ get_collation_actual_version(COLLPROVIDER_ICU, langtag),
true, true);
if (OidIsValid(collid))
{
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 90ec773c024..c3ede994be5 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -2634,9 +2634,12 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
}
/*
- * Parse collation attributes and apply them to the open collator. This takes
- * a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
- * applies the key-value arguments.
+ * Parse collation attributes from the given locale string and apply them to
+ * the open collator.
+ *
+ * First, the locale string is canonicalized to an ICU format locale ID such
+ * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
+ * the key-value arguments.
*
* Starting with ICU version 54, the attributes are processed automatically by
* ucol_open(), so this is only necessary for emulating this behavior on older
@@ -2646,9 +2649,34 @@ pg_attribute_unused()
static void
icu_set_collation_attributes(UCollator *collator, const char *loc)
{
- char *str = asc_tolower(loc, strlen(loc));
+ UErrorCode status;
+ int32_t len;
+ char *icu_locale_id;
+ char *lower_str;
+ char *str;
+
+ /*
+ * The input locale may be a BCP 47 language tag, e.g.
+ * "und-u-kc-ks-level1", which expresses the same attributes in a
+ * different form. It will be converted to the equivalent ICU format
+ * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
+ * uloc_canonicalize().
+ */
+ status = U_ZERO_ERROR;
+ len = uloc_canonicalize(loc, NULL, 0, &status);
+ icu_locale_id = palloc(len + 1);
+ status = U_ZERO_ERROR;
+ len = uloc_canonicalize(loc, icu_locale_id, len + 1, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("canonicalization failed for locale string \"%s\": %s",
+ loc, u_errorName(status))));
- str = strchr(str, '@');
+ lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
+
+ pfree(icu_locale_id);
+
+ str = strchr(lower_str, '@');
if (!str)
return;
str++;
@@ -2663,7 +2691,6 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
char *value;
UColAttribute uattr;
UColAttributeValue uvalue;
- UErrorCode status;
status = U_ZERO_ERROR;
@@ -2730,6 +2757,8 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
loc, u_errorName(status))));
}
}
+
+ pfree(lower_str);
}
#endif /* USE_ICU */
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 9a3e12e42df..6225b575cea 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1304,6 +1304,14 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
+-- test language tags
+CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
+SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
+ ?column?
+----------
+ t
+(1 row)
+
CREATE TABLE test1cs (x text COLLATE case_sensitive);
CREATE TABLE test2cs (x text COLLATE case_sensitive);
CREATE TABLE test3cs (x text COLLATE case_sensitive);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 0790068f31a..64cbfd0a5bd 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -518,6 +518,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+-- test language tags
+CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
+SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
+
CREATE TABLE test1cs (x text COLLATE case_sensitive);
CREATE TABLE test2cs (x text COLLATE case_sensitive);
CREATE TABLE test3cs (x text COLLATE case_sensitive);