aboutsummaryrefslogtreecommitdiff
path: root/src/backend/commands/collationcmds.c
diff options
context:
space:
mode:
authorPeter Eisentraut <peter@eisentraut.org>2023-01-03 14:21:40 +0100
committerPeter Eisentraut <peter@eisentraut.org>2023-01-03 14:21:56 +0100
commitbf03cfd162176d543da79f9398131abc251ddbb9 (patch)
treec8b43b2ac1bd10de8c332d4ea726d03e555ef5a8 /src/backend/commands/collationcmds.c
parent33ab0a2a527e3af5beee3a98fc07201e555d6e45 (diff)
downloadpostgresql-bf03cfd162176d543da79f9398131abc251ddbb9.tar.gz
postgresql-bf03cfd162176d543da79f9398131abc251ddbb9.zip
Windows support in pg_import_system_collations
Windows can enumerate the locales that are either installed or supported by calling EnumSystemLocalesEx(), similar to what is already done in the READ_LOCALE_A_OUTPUT switch. We can refactor some of the logic already used in that switch into a new function create_collation_from_locale(). The enumerated locales have BCP 47 shape, that is with a hyphen between language and territory, instead of POSIX's underscore. The created collations will retain the BCP 47 shape, but we will also create a POSIX alias, so xx-YY will have an xx_YY alias. A new test collate.windows.win1252 is added that is like collate.linux.utf8. Author: Juan Jose Santamaria Flecha <juanjo.santamaria@gmail.com> Reviewed-by: Dmitry Koval <d.koval@postgrespro.ru> Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com> Discussion: https://www.postgresql.org/message-id/flat/0050ec23-34d9-2765-9015-98c04f0e18ac@postgrespro.ru
Diffstat (limited to 'src/backend/commands/collationcmds.c')
-rw-r--r--src/backend/commands/collationcmds.c237
1 files changed, 187 insertions, 50 deletions
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index 519986a402f..6a4311cc631 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -499,6 +499,12 @@ pg_collation_actual_version(PG_FUNCTION_ARGS)
#define READ_LOCALE_A_OUTPUT
#endif
+/* will we use EnumSystemLocalesEx in pg_import_system_collations? */
+#ifdef WIN32
+#define ENUM_SYSTEM_LOCALE
+#endif
+
+
#ifdef READ_LOCALE_A_OUTPUT
/*
* "Normalize" a libc locale name, stripping off encoding tags such as
@@ -611,6 +617,161 @@ get_icu_locale_comment(const char *localename)
/*
+ * Create a new collation using the input locale 'locale'. (subroutine for
+ * pg_import_system_collations())
+ *
+ * 'nspid' is the namespace id where the collation will be created.
+ *
+ * 'nvalidp' is incremented if the locale has a valid encoding.
+ *
+ * 'ncreatedp' is incremented if the collation is actually created. If the
+ * collation already exists it will quietly do nothing.
+ *
+ * The returned value is the encoding of the locale, -1 if the locale is not
+ * valid for creating a collation.
+ *
+ */
+pg_attribute_unused()
+static int
+create_collation_from_locale(const char *locale, int nspid,
+ int *nvalidp, int *ncreatedp)
+{
+ int enc;
+ Oid collid;
+
+ /*
+ * Some systems have locale names that don't consist entirely of
+ * ASCII letters (such as "bokm&aring;l" or "fran&ccedil;ais").
+ * This is pretty silly, since we need the locale itself to
+ * interpret the non-ASCII characters. We can't do much with
+ * those, so we filter them out.
+ */
+ if (!pg_is_ascii(locale))
+ {
+ elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", locale);
+ return -1;
+ }
+
+ enc = pg_get_encoding_from_locale(locale, false);
+ if (enc < 0)
+ {
+ elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", locale);
+ return -1;
+ }
+ if (!PG_VALID_BE_ENCODING(enc))
+ {
+ elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", locale);
+ return -1;
+ }
+ if (enc == PG_SQL_ASCII)
+ return -1; /* C/POSIX are already in the catalog */
+
+ /* count valid locales found in operating system */
+ (*nvalidp)++;
+
+ /*
+ * Create a collation named the same as the locale, but quietly
+ * doing nothing if it already exists. This is the behavior we
+ * need even at initdb time, because some versions of "locale -a"
+ * can report the same locale name more than once. And it's
+ * convenient for later import runs, too, since you just about
+ * always want to add on new locales without a lot of chatter
+ * about existing ones.
+ */
+ collid = CollationCreate(locale, nspid, GetUserId(),
+ COLLPROVIDER_LIBC, true, enc,
+ locale, locale, NULL,
+ get_collation_actual_version(COLLPROVIDER_LIBC, locale),
+ true, true);
+ if (OidIsValid(collid))
+ {
+ (*ncreatedp)++;
+
+ /* Must do CCI between inserts to handle duplicates correctly */
+ CommandCounterIncrement();
+ }
+
+ return enc;
+}
+
+
+#ifdef ENUM_SYSTEM_LOCALE
+/* parameter to be passed to the callback function win32_read_locale() */
+typedef struct
+{
+ Oid nspid;
+ int *ncreatedp;
+ int *nvalidp;
+} CollParam;
+
+/*
+ * Callback function for EnumSystemLocalesEx() in
+ * pg_import_system_collations(). Creates a collation for every valid locale
+ * and a POSIX alias collation.
+ *
+ * The callback contract is to return TRUE to continue enumerating and FALSE
+ * to stop enumerating. We always want to continue.
+ */
+static BOOL CALLBACK
+win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
+{
+ CollParam *param = (CollParam *) lparam;
+ char localebuf[NAMEDATALEN];
+ int result;
+ int enc;
+
+ (void) dwFlags;
+
+ result = WideCharToMultiByte(CP_ACP, 0, pStr, -1, localebuf, NAMEDATALEN,
+ NULL, NULL);
+
+ if (result == 0)
+ {
+ if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
+ elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
+ return TRUE;
+ }
+ if (localebuf[0] == '\0')
+ return TRUE;
+
+ enc = create_collation_from_locale(localebuf, param->nspid,
+ param->nvalidp, param->ncreatedp);
+ if (enc < 0)
+ return TRUE;
+
+ /*
+ * Windows will use hyphens between language and territory, where POSIX
+ * uses an underscore. Simply create a POSIX alias.
+ */
+ if (strchr(localebuf, '-'))
+ {
+ char alias[NAMEDATALEN];
+ Oid collid;
+
+ strcpy(alias, localebuf);
+ for (char *p = alias; *p; p++)
+ if (*p == '-')
+ *p = '_';
+
+ collid = CollationCreate(alias, param->nspid, GetUserId(),
+ COLLPROVIDER_LIBC, true, enc,
+ localebuf, localebuf, NULL,
+ get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
+ true, true);
+ if (OidIsValid(collid))
+ {
+ (*param->ncreatedp)++;
+
+ CommandCounterIncrement();
+ }
+ }
+
+ return TRUE;
+}
+#endif /* ENUM_SYSTEM_LOCALE */
+
+
+/*
* pg_import_system_collations: add known system collations to pg_collation
*/
Datum
@@ -668,58 +829,9 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
}
localebuf[len - 1] = '\0';
- /*
- * Some systems have locale names that don't consist entirely of
- * ASCII letters (such as "bokm&aring;l" or "fran&ccedil;ais").
- * This is pretty silly, since we need the locale itself to
- * interpret the non-ASCII characters. We can't do much with
- * those, so we filter them out.
- */
- if (!pg_is_ascii(localebuf))
- {
- elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", localebuf);
- continue;
- }
-
- enc = pg_get_encoding_from_locale(localebuf, false);
+ enc = create_collation_from_locale(localebuf, nspid, &nvalid, &ncreated);
if (enc < 0)
- {
- elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"",
- localebuf);
- continue;
- }
- if (!PG_VALID_BE_ENCODING(enc))
- {
- elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", localebuf);
continue;
- }
- if (enc == PG_SQL_ASCII)
- continue; /* C/POSIX are already in the catalog */
-
- /* count valid locales found in operating system */
- nvalid++;
-
- /*
- * Create a collation named the same as the locale, but quietly
- * doing nothing if it already exists. This is the behavior we
- * need even at initdb time, because some versions of "locale -a"
- * can report the same locale name more than once. And it's
- * convenient for later import runs, too, since you just about
- * always want to add on new locales without a lot of chatter
- * about existing ones.
- */
- collid = CollationCreate(localebuf, nspid, GetUserId(),
- COLLPROVIDER_LIBC, true, enc,
- localebuf, localebuf, NULL,
- get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
- true, true);
- if (OidIsValid(collid))
- {
- ncreated++;
-
- /* Must do CCI between inserts to handle duplicates correctly */
- CommandCounterIncrement();
- }
/*
* Generate aliases such as "en_US" in addition to "en_US.utf8"
@@ -857,5 +969,30 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
}
#endif /* USE_ICU */
+ /* Load collations known to WIN32 */
+#ifdef ENUM_SYSTEM_LOCALE
+ {
+ int nvalid = 0;
+ CollParam param;
+
+ param.nspid = nspid;
+ param.ncreatedp = &ncreated;
+ param.nvalidp = &nvalid;
+
+ /*
+ * Enumerate the locales that are either installed on or supported
+ * by the OS.
+ */
+ if (!EnumSystemLocalesEx(win32_read_locale, LOCALE_ALL,
+ (LPARAM) &param, NULL))
+ _dosmaperr(GetLastError());
+
+ /* Give a warning if EnumSystemLocalesEx seems to be malfunctioning */
+ if (nvalid == 0)
+ ereport(WARNING,
+ (errmsg("no usable system locales were found")));
+ }
+#endif /* ENUM_SYSTEM_LOCALE */
+
PG_RETURN_INT32(ncreated);
}