Fix char2wchar/wchar2char to support collations properly.

These functions should take a pg_locale_t, not a collation OID, and should call mbstowcs_l/wcstombs_l where available. Where those functions are not available, temporarily select the correct locale with uselocale(). This change removes the bogus assumption that all locales selectable in a given database have the same wide-character conversion method; in particular, the collate.linux.utf8 regression test now passes with LC_CTYPE=C, so long as the database encoding is UTF8. I decided to move the char2wchar/wchar2char functions out of mbutils.c and into pg_locale.c, because they work on wchar_t not pg_wchar_t and thus don't really belong with the mbutils.c functions. Keeping them where they were would have required importing pg_locale_t into pg_wchar.h somehow, which did not seem like a good plan.
author: Tom Lane <tgl@sss.pgh.pa.us> 2011-04-23 12:35:41 -0400
committer: Tom Lane <tgl@sss.pgh.pa.us> 2011-04-23 12:35:41 -0400
commit: 2ab0796d7a3a7116a79b65531fd33f1548514b52 (patch)
tree: b327fa2ba27bef4dbd2dd287e23b5bf6ba8ac2ee /src/backend/utils/mb/mbutils.c
parent: bb850306307d3d6ebb611c4039ae127236eb1699 (diff)
download: postgresql-2ab0796d7a3a7116a79b65531fd33f1548514b52.tar.gz
postgresql-2ab0796d7a3a7116a79b65531fd33f1548514b52.zip
1 files changed, 0 insertions, 121 deletions
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 3cb7ce3269d..848c26f41fb 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -13,7 +13,6 @@
 #include "mb/pg_wchar.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"
-#include "utils/pg_locale.h"
 #include "utils/syscache.h"
 
 /*
@@ -689,126 +688,6 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_
 }
 
 
-
-#ifdef USE_WIDE_UPPER_LOWER
-
-/*
- * wchar2char --- convert wide characters to multibyte format
- *
- * This has the same API as the standard wcstombs() function; in particular,
- * tolen is the maximum number of bytes to store at *to, and *from must be
- * zero-terminated.  The output will be zero-terminated iff there is room.
- */
-size_t
-wchar2char(char *to, const wchar_t *from, size_t tolen, Oid collation)
-{
-	size_t		result;
-
-	if (tolen == 0)
-		return 0;
-
-#ifdef WIN32
-
-	/*
-	 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
-	 * for some reason mbstowcs and wcstombs won't do this for us, so we use
-	 * MultiByteToWideChar().
-	 */
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
-									 NULL, NULL);
-		/* A zero return is failure */
-		if (result <= 0)
-			result = -1;
-		else
-		{
-			Assert(result <= tolen);
-			/* Microsoft counts the zero terminator in the result */
-			result--;
-		}
-	}
-	else
-#endif   /* WIN32 */
-	{
-		Assert(!lc_ctype_is_c(collation));
-		result = wcstombs(to, from, tolen);
-	}
-	return result;
-}
-
-/*
- * char2wchar --- convert multibyte characters to wide characters
- *
- * This has almost the API of mbstowcs(), except that *from need not be
- * null-terminated; instead, the number of input bytes is specified as
- * fromlen.  Also, we ereport() rather than returning -1 for invalid
- * input encoding.	tolen is the maximum number of wchar_t's to store at *to.
- * The output will be zero-terminated iff there is room.
- */
-size_t
-char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, Oid collation)
-{
-	size_t		result;
-
-	if (tolen == 0)
-		return 0;
-
-#ifdef WIN32
-	/* See WIN32 "Unicode" comment above */
-	if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		/* Win32 API does not work for zero-length input */
-		if (fromlen == 0)
-			result = 0;
-		else
-		{
-			result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
-			/* A zero return is failure */
-			if (result == 0)
-				result = -1;
-		}
-
-		if (result != -1)
-		{
-			Assert(result < tolen);
-			/* Append trailing null wchar (MultiByteToWideChar() does not) */
-			to[result] = 0;
-		}
-	}
-	else
-#endif   /* WIN32 */
-	{
-		/* mbstowcs requires ending '\0' */
-		char	   *str = pnstrdup(from, fromlen);
-
-		Assert(!lc_ctype_is_c(collation));
-		result = mbstowcs(to, str, tolen);
-		pfree(str);
-	}
-
-	if (result == -1)
-	{
-		/*
-		 * Invalid multibyte character encountered.  We try to give a useful
-		 * error message by letting pg_verifymbstr check the string.  But it's
-		 * possible that the string is OK to us, and not OK to mbstowcs ---
-		 * this suggests that the LC_CTYPE locale is different from the
-		 * database encoding.  Give a generic error message if verifymbstr
-		 * can't find anything wrong.
-		 */
-		pg_verifymbstr(from, fromlen, false);	/* might not return */
-		/* but if it does ... */
-		ereport(ERROR,
-				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-				 errmsg("invalid multibyte character for locale"),
-				 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
-	}
-
-	return result;
-}
-#endif
-
 /* convert a multibyte string to a wchar */
 int
 pg_mb2wchar(const char *from, pg_wchar *to)
author	Tom Lane <tgl@sss.pgh.pa.us>	2011-04-23 12:35:41 -0400
committer	Tom Lane <tgl@sss.pgh.pa.us>	2011-04-23 12:35:41 -0400
commit	2ab0796d7a3a7116a79b65531fd33f1548514b52 (patch)
tree	b327fa2ba27bef4dbd2dd287e23b5bf6ba8ac2ee /src/backend/utils/mb/mbutils.c
parent	bb850306307d3d6ebb611c4039ae127236eb1699 (diff)
download	postgresql-2ab0796d7a3a7116a79b65531fd33f1548514b52.tar.gz postgresql-2ab0796d7a3a7116a79b65531fd33f1548514b52.zip