Fix mapping of PostgreSQL encodings to Python encodings.

Windows encodings, "win1252" and so forth, are named differently in Python, like "cp1252". Also, if the PyUnicode_AsEncodedString() function call fails for some reason, use a plain ereport(), not a PLy_elog(), to report that error. That avoids recursion and crash, if PLy_elog() tries to call PLyUnicode_Bytes() again. This fixes bug reported by Asif Naeem. Backpatch down to 9.0, before that plpython didn't even try these conversions. Jan Urbański, with minor comment improvements by me.
author: Heikki Linnakangas <heikki.linnakangas@iki.fi> 2012-07-05 21:45:24 +0300
committer: Heikki Linnakangas <heikki.linnakangas@iki.fi> 2012-07-05 22:31:47 +0300
commit: 773288016d00bd72e8e2bcb5b1eec67abb2d9e00 (patch)
tree: 11de66da62a215d726e0498b38d84edf8e426eee
parent: 071589cb3f6b4e259d3b87f228fd1c69b8189037 (diff)
download: postgresql-773288016d00bd72e8e2bcb5b1eec67abb2d9e00.tar.gz
postgresql-773288016d00bd72e8e2bcb5b1eec67abb2d9e00.zip
1 files changed, 62 insertions, 7 deletions
diff --git a/src/pl/plpython/plpy_util.c b/src/pl/plpython/plpy_util.c
index 9a4901ecb2f..bf2953226f4 100644
--- a/src/pl/plpython/plpy_util.c
+++ b/src/pl/plpython/plpy_util.c
@@ -65,16 +65,71 @@ PLyUnicode_Bytes(PyObject *unicode)
 	const char *serverenc;
 
 	/*
-	 * Python understands almost all PostgreSQL encoding names, but it doesn't
-	 * know SQL_ASCII.
+	 * Map PostgreSQL encoding to a Python encoding name.
 	 */
-	if (GetDatabaseEncoding() == PG_SQL_ASCII)
-		serverenc = "ascii";
-	else
-		serverenc = GetDatabaseEncodingName();
+	switch (GetDatabaseEncoding())
+	{
+		case PG_SQL_ASCII:
+			/*
+			 * Mapping SQL_ASCII to Python's 'ascii' is a bit bogus. Python's
+			 * 'ascii' means true 7-bit only ASCII, while PostgreSQL's
+			 * SQL_ASCII means that anything is allowed, and the system doesn't
+			 * try to interpret the bytes in any way. But not sure what else
+			 * to do, and we haven't heard any complaints...
+			 */
+			serverenc = "ascii";
+			break;
+		case PG_WIN1250:
+			serverenc = "cp1250";
+			break;
+		case PG_WIN1251:
+			serverenc = "cp1251";
+			break;
+		case PG_WIN1252:
+			serverenc = "cp1252";
+			break;
+		case PG_WIN1253:
+			serverenc = "cp1253";
+			break;
+		case PG_WIN1254:
+			serverenc = "cp1254";
+			break;
+		case PG_WIN1255:
+			serverenc = "cp1255";
+			break;
+		case PG_WIN1256:
+			serverenc = "cp1256";
+			break;
+		case PG_WIN1257:
+			serverenc = "cp1257";
+			break;
+		case PG_WIN1258:
+			serverenc = "cp1258";
+			break;
+		case PG_WIN866:
+			serverenc = "cp866";
+			break;
+		case PG_WIN874:
+			serverenc = "cp874";
+			break;
+		default:
+			/* Other encodings have the same name in Python. */
+			serverenc = GetDatabaseEncodingName();
+			break;
+	}
+
 	rv = PyUnicode_AsEncodedString(unicode, serverenc, "strict");
 	if (rv == NULL)
-		PLy_elog(ERROR, "could not convert Python Unicode object to PostgreSQL server encoding");
+	{
+		/*
+		 * Use a plain ereport instead of PLy_elog to avoid recursion, if
+		 * the traceback formatting functions try to do unicode to bytes
+		 * conversion again.
+		 */
+		ereport(ERROR,
+				(errcode(ERRCODE_INTERNAL_ERROR),
+				 errmsg("could not convert Python Unicode object to PostgreSQL server encoding")));
+	}
 	return rv;
 }
author	Heikki Linnakangas <heikki.linnakangas@iki.fi>	2012-07-05 21:45:24 +0300
committer	Heikki Linnakangas <heikki.linnakangas@iki.fi>	2012-07-05 22:31:47 +0300
commit	773288016d00bd72e8e2bcb5b1eec67abb2d9e00 (patch)
tree	11de66da62a215d726e0498b38d84edf8e426eee
parent	071589cb3f6b4e259d3b87f228fd1c69b8189037 (diff)
download	postgresql-773288016d00bd72e8e2bcb5b1eec67abb2d9e00.tar.gz postgresql-773288016d00bd72e8e2bcb5b1eec67abb2d9e00.zip