aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/tsearch/ts_locale.c27
1 files changed, 19 insertions, 8 deletions
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index 85e1cd003ad..a369dad12be 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -23,18 +23,29 @@ static void tsearch_readline_callback(void *arg);
#ifdef USE_WIDE_UPPER_LOWER
+/*
+ * The reason these functions use a 3-wchar_t output buffer, not 2 as you
+ * might expect, is that on Windows "wchar_t" is 16 bits and what we'll be
+ * getting from char2wchar() is UTF16 not UTF32. A single input character
+ * may therefore produce a surrogate pair rather than just one wchar_t;
+ * we also need room for a trailing null. When we do get a surrogate pair,
+ * we pass just the first code to iswdigit() etc, so that these functions will
+ * always return false for characters outside the Basic Multilingual Plane.
+ */
+#define WC_BUF_LEN 3
+
int
t_isdigit(const char *ptr)
{
int clen = pg_mblen(ptr);
- wchar_t character[2];
+ wchar_t character[WC_BUF_LEN];
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || lc_ctype_is_c(collation))
return isdigit(TOUCHAR(ptr));
- char2wchar(character, 2, ptr, clen, mylocale);
+ char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
return iswdigit((wint_t) character[0]);
}
@@ -43,14 +54,14 @@ int
t_isspace(const char *ptr)
{
int clen = pg_mblen(ptr);
- wchar_t character[2];
+ wchar_t character[WC_BUF_LEN];
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || lc_ctype_is_c(collation))
return isspace(TOUCHAR(ptr));
- char2wchar(character, 2, ptr, clen, mylocale);
+ char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
return iswspace((wint_t) character[0]);
}
@@ -59,14 +70,14 @@ int
t_isalpha(const char *ptr)
{
int clen = pg_mblen(ptr);
- wchar_t character[2];
+ wchar_t character[WC_BUF_LEN];
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || lc_ctype_is_c(collation))
return isalpha(TOUCHAR(ptr));
- char2wchar(character, 2, ptr, clen, mylocale);
+ char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
return iswalpha((wint_t) character[0]);
}
@@ -75,14 +86,14 @@ int
t_isprint(const char *ptr)
{
int clen = pg_mblen(ptr);
- wchar_t character[2];
+ wchar_t character[WC_BUF_LEN];
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || lc_ctype_is_c(collation))
return isprint(TOUCHAR(ptr));
- char2wchar(character, 2, ptr, clen, mylocale);
+ char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
return iswprint((wint_t) character[0]);
}