diff options
author | Peter Eisentraut <peter@eisentraut.org> | 2023-01-03 14:21:40 +0100 |
---|---|---|
committer | Peter Eisentraut <peter@eisentraut.org> | 2023-01-03 14:21:56 +0100 |
commit | bf03cfd162176d543da79f9398131abc251ddbb9 (patch) | |
tree | c8b43b2ac1bd10de8c332d4ea726d03e555ef5a8 /src/backend/commands/collationcmds.c | |
parent | 33ab0a2a527e3af5beee3a98fc07201e555d6e45 (diff) | |
download | postgresql-bf03cfd162176d543da79f9398131abc251ddbb9.tar.gz postgresql-bf03cfd162176d543da79f9398131abc251ddbb9.zip |
Windows support in pg_import_system_collations
Windows can enumerate the locales that are either installed or
supported by calling EnumSystemLocalesEx(), similar to what is already
done in the READ_LOCALE_A_OUTPUT switch. We can refactor some of the
logic already used in that switch into a new function
create_collation_from_locale().
The enumerated locales have BCP 47 shape, that is with a hyphen
between language and territory, instead of POSIX's underscore. The
created collations will retain the BCP 47 shape, but we will also
create a POSIX alias, so xx-YY will have an xx_YY alias.
A new test collate.windows.win1252 is added that is like
collate.linux.utf8.
Author: Juan Jose Santamaria Flecha <juanjo.santamaria@gmail.com>
Reviewed-by: Dmitry Koval <d.koval@postgrespro.ru>
Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com>
Discussion: https://www.postgresql.org/message-id/flat/0050ec23-34d9-2765-9015-98c04f0e18ac@postgrespro.ru
Diffstat (limited to 'src/backend/commands/collationcmds.c')
-rw-r--r-- | src/backend/commands/collationcmds.c | 237 |
1 files changed, 187 insertions, 50 deletions
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 519986a402f..6a4311cc631 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -499,6 +499,12 @@ pg_collation_actual_version(PG_FUNCTION_ARGS) #define READ_LOCALE_A_OUTPUT #endif +/* will we use EnumSystemLocalesEx in pg_import_system_collations? */ +#ifdef WIN32 +#define ENUM_SYSTEM_LOCALE +#endif + + #ifdef READ_LOCALE_A_OUTPUT /* * "Normalize" a libc locale name, stripping off encoding tags such as @@ -611,6 +617,161 @@ get_icu_locale_comment(const char *localename) /* + * Create a new collation using the input locale 'locale'. (subroutine for + * pg_import_system_collations()) + * + * 'nspid' is the namespace id where the collation will be created. + * + * 'nvalidp' is incremented if the locale has a valid encoding. + * + * 'ncreatedp' is incremented if the collation is actually created. If the + * collation already exists it will quietly do nothing. + * + * The returned value is the encoding of the locale, -1 if the locale is not + * valid for creating a collation. + * + */ +pg_attribute_unused() +static int +create_collation_from_locale(const char *locale, int nspid, + int *nvalidp, int *ncreatedp) +{ + int enc; + Oid collid; + + /* + * Some systems have locale names that don't consist entirely of + * ASCII letters (such as "bokmål" or "français"). + * This is pretty silly, since we need the locale itself to + * interpret the non-ASCII characters. We can't do much with + * those, so we filter them out. + */ + if (!pg_is_ascii(locale)) + { + elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", locale); + return -1; + } + + enc = pg_get_encoding_from_locale(locale, false); + if (enc < 0) + { + elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", locale); + return -1; + } + if (!PG_VALID_BE_ENCODING(enc)) + { + elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", locale); + return -1; + } + if (enc == PG_SQL_ASCII) + return -1; /* C/POSIX are already in the catalog */ + + /* count valid locales found in operating system */ + (*nvalidp)++; + + /* + * Create a collation named the same as the locale, but quietly + * doing nothing if it already exists. This is the behavior we + * need even at initdb time, because some versions of "locale -a" + * can report the same locale name more than once. And it's + * convenient for later import runs, too, since you just about + * always want to add on new locales without a lot of chatter + * about existing ones. + */ + collid = CollationCreate(locale, nspid, GetUserId(), + COLLPROVIDER_LIBC, true, enc, + locale, locale, NULL, + get_collation_actual_version(COLLPROVIDER_LIBC, locale), + true, true); + if (OidIsValid(collid)) + { + (*ncreatedp)++; + + /* Must do CCI between inserts to handle duplicates correctly */ + CommandCounterIncrement(); + } + + return enc; +} + + +#ifdef ENUM_SYSTEM_LOCALE +/* parameter to be passed to the callback function win32_read_locale() */ +typedef struct +{ + Oid nspid; + int *ncreatedp; + int *nvalidp; +} CollParam; + +/* + * Callback function for EnumSystemLocalesEx() in + * pg_import_system_collations(). Creates a collation for every valid locale + * and a POSIX alias collation. + * + * The callback contract is to return TRUE to continue enumerating and FALSE + * to stop enumerating. We always want to continue. + */ +static BOOL CALLBACK +win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam) +{ + CollParam *param = (CollParam *) lparam; + char localebuf[NAMEDATALEN]; + int result; + int enc; + + (void) dwFlags; + + result = WideCharToMultiByte(CP_ACP, 0, pStr, -1, localebuf, NAMEDATALEN, + NULL, NULL); + + if (result == 0) + { + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) + elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf); + return TRUE; + } + if (localebuf[0] == '\0') + return TRUE; + + enc = create_collation_from_locale(localebuf, param->nspid, + param->nvalidp, param->ncreatedp); + if (enc < 0) + return TRUE; + + /* + * Windows will use hyphens between language and territory, where POSIX + * uses an underscore. Simply create a POSIX alias. + */ + if (strchr(localebuf, '-')) + { + char alias[NAMEDATALEN]; + Oid collid; + + strcpy(alias, localebuf); + for (char *p = alias; *p; p++) + if (*p == '-') + *p = '_'; + + collid = CollationCreate(alias, param->nspid, GetUserId(), + COLLPROVIDER_LIBC, true, enc, + localebuf, localebuf, NULL, + get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), + true, true); + if (OidIsValid(collid)) + { + (*param->ncreatedp)++; + + CommandCounterIncrement(); + } + } + + return TRUE; +} +#endif /* ENUM_SYSTEM_LOCALE */ + + +/* * pg_import_system_collations: add known system collations to pg_collation */ Datum @@ -668,58 +829,9 @@ pg_import_system_collations(PG_FUNCTION_ARGS) } localebuf[len - 1] = '\0'; - /* - * Some systems have locale names that don't consist entirely of - * ASCII letters (such as "bokmål" or "français"). - * This is pretty silly, since we need the locale itself to - * interpret the non-ASCII characters. We can't do much with - * those, so we filter them out. - */ - if (!pg_is_ascii(localebuf)) - { - elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", localebuf); - continue; - } - - enc = pg_get_encoding_from_locale(localebuf, false); + enc = create_collation_from_locale(localebuf, nspid, &nvalid, &ncreated); if (enc < 0) - { - elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", - localebuf); - continue; - } - if (!PG_VALID_BE_ENCODING(enc)) - { - elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", localebuf); continue; - } - if (enc == PG_SQL_ASCII) - continue; /* C/POSIX are already in the catalog */ - - /* count valid locales found in operating system */ - nvalid++; - - /* - * Create a collation named the same as the locale, but quietly - * doing nothing if it already exists. This is the behavior we - * need even at initdb time, because some versions of "locale -a" - * can report the same locale name more than once. And it's - * convenient for later import runs, too, since you just about - * always want to add on new locales without a lot of chatter - * about existing ones. - */ - collid = CollationCreate(localebuf, nspid, GetUserId(), - COLLPROVIDER_LIBC, true, enc, - localebuf, localebuf, NULL, - get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), - true, true); - if (OidIsValid(collid)) - { - ncreated++; - - /* Must do CCI between inserts to handle duplicates correctly */ - CommandCounterIncrement(); - } /* * Generate aliases such as "en_US" in addition to "en_US.utf8" @@ -857,5 +969,30 @@ pg_import_system_collations(PG_FUNCTION_ARGS) } #endif /* USE_ICU */ + /* Load collations known to WIN32 */ +#ifdef ENUM_SYSTEM_LOCALE + { + int nvalid = 0; + CollParam param; + + param.nspid = nspid; + param.ncreatedp = &ncreated; + param.nvalidp = &nvalid; + + /* + * Enumerate the locales that are either installed on or supported + * by the OS. + */ + if (!EnumSystemLocalesEx(win32_read_locale, LOCALE_ALL, + (LPARAM) ¶m, NULL)) + _dosmaperr(GetLastError()); + + /* Give a warning if EnumSystemLocalesEx seems to be malfunctioning */ + if (nvalid == 0) + ereport(WARNING, + (errmsg("no usable system locales were found"))); + } +#endif /* ENUM_SYSTEM_LOCALE */ + PG_RETURN_INT32(ncreated); } |