diff options
author | Jeff Davis <jdavis@postgresql.org> | 2023-06-21 13:18:25 -0700 |
---|---|---|
committer | Jeff Davis <jdavis@postgresql.org> | 2023-06-21 13:18:25 -0700 |
commit | f3a01af29b1941ca7937a45504ab10ef5f99cdff (patch) | |
tree | 5211872fb7921674ddfeb0873689983fed80eaea /src | |
parent | 2535c74b1a6190cc42e13f6b6b55d94bff4b7dd6 (diff) | |
download | postgresql-f3a01af29b1941ca7937a45504ab10ef5f99cdff.tar.gz postgresql-f3a01af29b1941ca7937a45504ab10ef5f99cdff.zip |
ICU: do not convert locale 'C' to 'en-US-u-va-posix'.
Older versions of ICU canonicalize "C" to "en-US-u-va-posix"; but
starting in ICU version 64, the "C" locale is considered
obsolete. Postgres commit ea1db8ae70 introduced code to always
canonicalize "C" to "en-US-u-va-posix" for consistency and
convenience, but it was deemed too confusing.
This commit removes that code, so that "C" is treated like other ICU
locale names: canonicalization is attempted, and if it fails, the
behavior is controlled by icu_validation_level.
A similar change was previously committed as f7faa9976c, then reverted
due to an ICU-version-dependent test failure. This commit un-reverts
it, omitting the test because we now expect the behavior to depend on
the version of ICU being used.
Discussion: https://postgr.es/m/3a200aca-4672-4b37-fc91-5d198a323503%40eisentraut.org
Discussion: https://postgr.es/m/f83f089ee1e9acd5dbbbf3353294d24e1f196e95.camel@j-davis.com
Discussion: https://postgr.es/m/37520ec1ae9591f83132f82dbd625f3fc2d69c16.camel@j-davis.com
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/utils/adt/pg_locale.c | 19 | ||||
-rw-r--r-- | src/bin/initdb/initdb.c | 17 | ||||
-rw-r--r-- | src/test/regress/expected/collate.icu.utf8.out | 2 | ||||
-rw-r--r-- | src/test/regress/sql/collate.icu.utf8.sql | 2 |
4 files changed, 6 insertions, 34 deletions
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 31e3b16ae00..c8b36f3af2f 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -2784,26 +2784,10 @@ icu_language_tag(const char *loc_str, int elevel) { #ifdef USE_ICU UErrorCode status; - char lang[ULOC_LANG_CAPACITY]; char *langtag; size_t buflen = 32; /* arbitrary starting buffer size */ const bool strict = true; - status = U_ZERO_ERROR; - uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status); - if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) - { - if (elevel > 0) - ereport(elevel, - (errmsg("could not get language from locale \"%s\": %s", - loc_str, u_errorName(status)))); - return NULL; - } - - /* C/POSIX locales aren't handled by uloc_getLanguageTag() */ - if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0) - return pstrdup("en-US-u-va-posix"); - /* * A BCP47 language tag doesn't have a clearly-defined upper limit (cf. * RFC5646 section 4.4). Additionally, in older ICU versions, @@ -2884,8 +2868,7 @@ icu_validate_locale(const char *loc_str) /* check for special language name */ if (strcmp(lang, "") == 0 || - strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 || - strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0) + strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0) found = true; /* search for matching language within ICU */ diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index fa3af0d75c5..fc1fb363e74 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -2244,24 +2244,10 @@ icu_language_tag(const char *loc_str) { #ifdef USE_ICU UErrorCode status; - char lang[ULOC_LANG_CAPACITY]; char *langtag; size_t buflen = 32; /* arbitrary starting buffer size */ const bool strict = true; - status = U_ZERO_ERROR; - uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status); - if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) - { - pg_fatal("could not get language from locale \"%s\": %s", - loc_str, u_errorName(status)); - return NULL; - } - - /* C/POSIX locales aren't handled by uloc_getLanguageTag() */ - if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0) - return pstrdup("en-US-u-va-posix"); - /* * A BCP47 language tag doesn't have a clearly-defined upper limit (cf. * RFC5646 section 4.4). Additionally, in older ICU versions, @@ -2326,8 +2312,7 @@ icu_validate_locale(const char *loc_str) /* check for special language name */ if (strcmp(lang, "") == 0 || - strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 || - strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0) + strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0) found = true; /* search for matching language within ICU */ diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index b7fbee447f1..78a9cb38fa3 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1020,6 +1020,7 @@ CREATE ROLE regress_test_role; CREATE SCHEMA test_schema; -- We need to do this this way to cope with varying names for encodings: SET client_min_messages TO WARNING; +SET icu_validation_level = disabled; do $$ BEGIN EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' || @@ -1034,6 +1035,7 @@ BEGIN quote_literal((SELECT CASE WHEN datlocprovider='i' THEN daticulocale ELSE datcollate END FROM pg_database WHERE datname = current_database())) || ');'; END $$; +RESET icu_validation_level; RESET client_min_messages; CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale" ERROR: parameter "locale" must be specified diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 079d7ae39de..3db9e259138 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -358,6 +358,7 @@ CREATE SCHEMA test_schema; -- We need to do this this way to cope with varying names for encodings: SET client_min_messages TO WARNING; +SET icu_validation_level = disabled; do $$ BEGIN @@ -373,6 +374,7 @@ BEGIN END $$; +RESET icu_validation_level; RESET client_min_messages; CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale" |