aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Davis <jdavis@postgresql.org>2023-03-23 09:50:06 -0700
committerJeff Davis <jdavis@postgresql.org>2023-03-23 10:08:27 -0700
commit3b50275b12950280fb07193e24a4f400ed8a9fef (patch)
tree7ac5d04feffb1588de3d13c4a6d9a4d045232709
parent949e2e7c4f680ef86e93500ba4bee563ae4ce78e (diff)
downloadpostgresql-3b50275b12950280fb07193e24a4f400ed8a9fef.tar.gz
postgresql-3b50275b12950280fb07193e24a4f400ed8a9fef.zip
Handle the "und" locale in ICU versions 54 and older.
The "und" locale is an alternative spelling of the root locale, but it was not recognized until ICU 55. To maintain common behavior across all supported ICU versions, check for "und" and replace with "root" before opening. Previously, the lack of support for "und" was dangerous, because versions 54 and older fall back to the environment when a locale is not found. If the user specified "und" for the language (which is expected and documented), it could not only resolve to the wrong collator, but it could unexpectedly change (which could lead to corrupt indexes). This effectively reverts commit d72900bded, which worked around the problem for the built-in "unicode" collation, and is no longer necessary. Discussion: https://postgr.es/m/60da0cecfb512a78b8666b31631a636215d8ce73.camel@j-davis.com Discussion: https://postgr.es/m/0c6fa66f2753217d2a40480a96bd2ccf023536a1.camel@j-davis.com Reviewed-by: Peter Eisentraut
-rw-r--r--src/backend/utils/adt/pg_locale.c34
-rw-r--r--src/bin/initdb/initdb.c2
-rw-r--r--src/test/regress/expected/collate.icu.utf8.out7
-rw-r--r--src/test/regress/sql/collate.icu.utf8.sql2
4 files changed, 44 insertions, 1 deletions
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index dd0786dff52..42b6ad45cb8 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -2503,6 +2503,7 @@ pg_ucol_open(const char *loc_str)
{
UCollator *collator;
UErrorCode status;
+ char *fixed_str = NULL;
/*
* Must never open default collator, because it depends on the environment
@@ -2517,6 +2518,36 @@ pg_ucol_open(const char *loc_str)
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("opening default collator is not supported")));
+ /*
+ * In ICU versions 54 and earlier, "und" is not a recognized spelling of
+ * the root locale. If the first component of the locale is "und", replace
+ * with "root" before opening.
+ */
+ if (U_ICU_VERSION_MAJOR_NUM < 55)
+ {
+ char lang[ULOC_LANG_CAPACITY];
+
+ status = U_ZERO_ERROR;
+ uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
+ if (U_FAILURE(status))
+ {
+ ereport(ERROR,
+ (errmsg("could not get language from locale \"%s\": %s",
+ loc_str, u_errorName(status))));
+ }
+
+ if (strcmp(lang, "und") == 0)
+ {
+ const char *remainder = loc_str + strlen("und");
+
+ fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
+ strcpy(fixed_str, "root");
+ strcat(fixed_str, remainder);
+
+ loc_str = fixed_str;
+ }
+ }
+
status = U_ZERO_ERROR;
collator = ucol_open(loc_str, &status);
if (U_FAILURE(status))
@@ -2527,6 +2558,9 @@ pg_ucol_open(const char *loc_str)
if (U_ICU_VERSION_MAJOR_NUM < 54)
icu_set_collation_attributes(collator, loc_str);
+ if (fixed_str != NULL)
+ pfree(fixed_str);
+
return collator;
}
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 3c0bf5c0a89..bae97539fc8 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -1701,7 +1701,7 @@ setup_collation(FILE *cmdfd)
* that they win if libc defines a locale with the same name.
*/
PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, colliculocale)"
- "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'unicode', 'pg_catalog'::regnamespace, %u, '%c', true, -1, '');\n\n",
+ "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'unicode', 'pg_catalog'::regnamespace, %u, '%c', true, -1, 'und');\n\n",
BOOTSTRAP_SUPERUSERID, COLLPROVIDER_ICU);
PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)"
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 6225b575cea..f135200c997 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1312,6 +1312,13 @@ SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
t
(1 row)
+CREATE COLLATION lt_upperfirst (provider = icu, locale = 'und-u-kf-upper');
+SELECT 'Z' COLLATE lt_upperfirst < 'z' COLLATE lt_upperfirst;
+ ?column?
+----------
+ t
+(1 row)
+
CREATE TABLE test1cs (x text COLLATE case_sensitive);
CREATE TABLE test2cs (x text COLLATE case_sensitive);
CREATE TABLE test3cs (x text COLLATE case_sensitive);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 64cbfd0a5bd..8105ebc8ae1 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -521,6 +521,8 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
+CREATE COLLATION lt_upperfirst (provider = icu, locale = 'und-u-kf-upper');
+SELECT 'Z' COLLATE lt_upperfirst < 'z' COLLATE lt_upperfirst;
CREATE TABLE test1cs (x text COLLATE case_sensitive);
CREATE TABLE test2cs (x text COLLATE case_sensitive);