diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2011-04-23 12:35:41 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2011-04-23 12:35:41 -0400 |
commit | 2ab0796d7a3a7116a79b65531fd33f1548514b52 (patch) | |
tree | b327fa2ba27bef4dbd2dd287e23b5bf6ba8ac2ee /src/backend/utils/mb/mbutils.c | |
parent | bb850306307d3d6ebb611c4039ae127236eb1699 (diff) | |
download | postgresql-2ab0796d7a3a7116a79b65531fd33f1548514b52.tar.gz postgresql-2ab0796d7a3a7116a79b65531fd33f1548514b52.zip |
Fix char2wchar/wchar2char to support collations properly.
These functions should take a pg_locale_t, not a collation OID, and should
call mbstowcs_l/wcstombs_l where available. Where those functions are not
available, temporarily select the correct locale with uselocale().
This change removes the bogus assumption that all locales selectable in
a given database have the same wide-character conversion method; in
particular, the collate.linux.utf8 regression test now passes with
LC_CTYPE=C, so long as the database encoding is UTF8.
I decided to move the char2wchar/wchar2char functions out of mbutils.c and
into pg_locale.c, because they work on wchar_t not pg_wchar_t and thus
don't really belong with the mbutils.c functions. Keeping them where they
were would have required importing pg_locale_t into pg_wchar.h somehow,
which did not seem like a good plan.
Diffstat (limited to 'src/backend/utils/mb/mbutils.c')
-rw-r--r-- | src/backend/utils/mb/mbutils.c | 121 |
1 files changed, 0 insertions, 121 deletions
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 3cb7ce3269d..848c26f41fb 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -13,7 +13,6 @@ #include "mb/pg_wchar.h" #include "utils/builtins.h" #include "utils/memutils.h" -#include "utils/pg_locale.h" #include "utils/syscache.h" /* @@ -689,126 +688,6 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_ } - -#ifdef USE_WIDE_UPPER_LOWER - -/* - * wchar2char --- convert wide characters to multibyte format - * - * This has the same API as the standard wcstombs() function; in particular, - * tolen is the maximum number of bytes to store at *to, and *from must be - * zero-terminated. The output will be zero-terminated iff there is room. - */ -size_t -wchar2char(char *to, const wchar_t *from, size_t tolen, Oid collation) -{ - size_t result; - - if (tolen == 0) - return 0; - -#ifdef WIN32 - - /* - * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and - * for some reason mbstowcs and wcstombs won't do this for us, so we use - * MultiByteToWideChar(). - */ - if (GetDatabaseEncoding() == PG_UTF8) - { - result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen, - NULL, NULL); - /* A zero return is failure */ - if (result <= 0) - result = -1; - else - { - Assert(result <= tolen); - /* Microsoft counts the zero terminator in the result */ - result--; - } - } - else -#endif /* WIN32 */ - { - Assert(!lc_ctype_is_c(collation)); - result = wcstombs(to, from, tolen); - } - return result; -} - -/* - * char2wchar --- convert multibyte characters to wide characters - * - * This has almost the API of mbstowcs(), except that *from need not be - * null-terminated; instead, the number of input bytes is specified as - * fromlen. Also, we ereport() rather than returning -1 for invalid - * input encoding. tolen is the maximum number of wchar_t's to store at *to. - * The output will be zero-terminated iff there is room. - */ -size_t -char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, Oid collation) -{ - size_t result; - - if (tolen == 0) - return 0; - -#ifdef WIN32 - /* See WIN32 "Unicode" comment above */ - if (GetDatabaseEncoding() == PG_UTF8) - { - /* Win32 API does not work for zero-length input */ - if (fromlen == 0) - result = 0; - else - { - result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1); - /* A zero return is failure */ - if (result == 0) - result = -1; - } - - if (result != -1) - { - Assert(result < tolen); - /* Append trailing null wchar (MultiByteToWideChar() does not) */ - to[result] = 0; - } - } - else -#endif /* WIN32 */ - { - /* mbstowcs requires ending '\0' */ - char *str = pnstrdup(from, fromlen); - - Assert(!lc_ctype_is_c(collation)); - result = mbstowcs(to, str, tolen); - pfree(str); - } - - if (result == -1) - { - /* - * Invalid multibyte character encountered. We try to give a useful - * error message by letting pg_verifymbstr check the string. But it's - * possible that the string is OK to us, and not OK to mbstowcs --- - * this suggests that the LC_CTYPE locale is different from the - * database encoding. Give a generic error message if verifymbstr - * can't find anything wrong. - */ - pg_verifymbstr(from, fromlen, false); /* might not return */ - /* but if it does ... */ - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("invalid multibyte character for locale"), - errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); - } - - return result; -} -#endif - /* convert a multibyte string to a wchar */ int pg_mb2wchar(const char *from, pg_wchar *to) |