aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/pg_locale.c
diff options
context:
space:
mode:
authorThomas Munro <tmunro@postgresql.org>2024-10-05 13:48:33 +1300
committerThomas Munro <tmunro@postgresql.org>2024-10-05 13:50:02 +1300
commitadbb27ac89e07cfbd94ea07a53930addfc3895ee (patch)
tree923f37af56335564b72ec8bdec77943ab8af267b /src/backend/utils/adt/pg_locale.c
parentf22e84df1dea96c8f4b0f7369ea60607fbb9ce10 (diff)
downloadpostgresql-adbb27ac89e07cfbd94ea07a53930addfc3895ee.tar.gz
postgresql-adbb27ac89e07cfbd94ea07a53930addfc3895ee.zip
Reject non-ASCII locale names.
Commit bf03cfd1 started scanning all available BCP 47 locale names on Windows. This caused an abort/crash in the Windows runtime library if the default locale name contained non-ASCII characters, because of our use of the setlocale() save/restore pattern with "char" strings. After switching to another locale with a different encoding, the saved name could no longer be understood, and setlocale() would abort. "Turkish_Türkiye.1254" is the example from recent reports, but there are other examples of countries and languages with non-ASCII characters in their names, and they appear in Windows' (old style) locale names. To defend against this: 1. In initdb, reject non-ASCII locale names given explicity on the command line, or returned by the operating system environment with setlocale(..., ""), or "canonicalized" by the operating system when we set it. 2. In initdb only, perform the save-and-restore with Windows' non-standard wchar_t variant of setlocale(), so that it is not subject to round trip failures stemming from char string encoding confusion. 3. In the backend, we don't have to worry about the save-and-restore problem because we have already vetted the defaults, so we just have to make sure that CREATE DATABASE also rejects non-ASCII names in any new databases. SET lc_XXX doesn't suffer from the problem, but the ban applies to it too because it uses check_locale(). CREATE COLLATION doesn't suffer from the problem either, but it doesn't use check_locale() so it is not included in the new ban for now, to minimize the change. Anyone who encounters the new error message should either create a new duplicated locale with an ASCII-only name using Windows Locale Builder, or consider using BCP 47 names like "tr-TR". Users already couldn't initialize a cluster with "Turkish_Türkiye.1254" on PostgreSQL 16+, but the new failure mode is an error message that explains why, instead of a crash. Back-patch to 16, where bf03cfd1 landed. Older versions are affected in theory too, but only 16 and later are causing crash reports. Reviewed-by: Andrew Dunstan <andrew@dunslane.net> (the idea, not the patch) Reported-by: Haifang Wang (Centific Technologies Inc) <v-haiwang@microsoft.com> Discussion: https://postgr.es/m/PH8PR21MB3902F334A3174C54058F792CE5182%40PH8PR21MB3902.namprd21.prod.outlook.com
Diffstat (limited to 'src/backend/utils/adt/pg_locale.c')
-rw-r--r--src/backend/utils/adt/pg_locale.c23
1 files changed, 23 insertions, 0 deletions
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index f2a28d5ef5a..b4954959f98 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -58,6 +58,7 @@
#include "catalog/pg_collation.h"
#include "catalog/pg_database.h"
#include "common/hashfn.h"
+#include "common/string.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "utils/builtins.h"
@@ -341,6 +342,16 @@ check_locale(int category, const char *locale, char **canonname)
char *save;
char *res;
+ /* Don't let Windows' non-ASCII locale names in. */
+ if (!pg_is_ascii(locale))
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("locale name \"%s\" contains non-ASCII characters",
+ locale)));
+ return false;
+ }
+
if (canonname)
*canonname = NULL; /* in case of failure */
@@ -363,6 +374,18 @@ check_locale(int category, const char *locale, char **canonname)
elog(WARNING, "failed to restore old locale \"%s\"", save);
pfree(save);
+ /* Don't let Windows' non-ASCII locale names out. */
+ if (canonname && *canonname && !pg_is_ascii(*canonname))
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("locale name \"%s\" contains non-ASCII characters",
+ *canonname)));
+ pfree(*canonname);
+ *canonname = NULL;
+ return false;
+ }
+
return (res != NULL);
}