plperl: Skip setting UTF8 flag when in SQL_ASCII encoding

When in SQL_ASCII encoding, strings passed around are not necessarily UTF8-safe. We had already fixed this in some places, but it looks like we missed some. I had to backpatch Peter Eisentraut's a8b92b60 to 9.1 in order for this patch to cherry-pick more cleanly. Patch from Alex Hunsaker, tweaked by Kyotaro HORIGUCHI and myself. Some desultory cleanup and comment addition by me, during patch review. Per bug report from Christoph Berg in 20120209102116.GA14429@msgid.df7cb.de
author: Alvaro Herrera <alvherre@alvh.no-ip.org> 2012-07-09 17:36:29 -0400
committer: Alvaro Herrera <alvherre@alvh.no-ip.org> 2012-07-10 15:50:58 -0400
commit: fc661f78c6007103604fc43e98622a71400b769f (patch)
tree: 13e87db428d62a65f816781623d8807d1b48c036 /src/pl/plperl/plperl_helpers.h
parent: 1fbe7d377cb6bf070442794a677499cd2e7531cc (diff)
download: postgresql-fc661f78c6007103604fc43e98622a71400b769f.tar.gz
postgresql-fc661f78c6007103604fc43e98622a71400b769f.zip
1 files changed, 51 insertions, 19 deletions
diff --git a/src/pl/plperl/plperl_helpers.h b/src/pl/plperl/plperl_helpers.h
index d3bdc1b9808..30d8c0bb0b0 100644
--- a/src/pl/plperl/plperl_helpers.h
+++ b/src/pl/plperl/plperl_helpers.h
@@ -3,21 +3,29 @@
 
 /*
  * convert from utf8 to database encoding
+ *
+ * Returns a palloc'ed copy of the original string
  */
 static inline char *
-utf_u2e(const char *utf8_str, size_t len)
+utf_u2e(char *utf8_str, size_t len)
 {
-	int 	    enc = GetDatabaseEncoding();
-
-	char	   *ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str, len, PG_UTF8, enc);
+	int			enc = GetDatabaseEncoding();
+	char	   *ret;
 
 	/*
-	* when we are a PG_UTF8 or SQL_ASCII database
-	* pg_do_encoding_conversion() will not do any conversion or
-	* verification. we need to do it manually instead.
-	*/
+	 * When we are in a PG_UTF8 or SQL_ASCII database
+	 * pg_do_encoding_conversion() will not do any conversion (which is good)
+	 * or verification (not so much), so we need to run the verification step
+	 * separately.
+	 */
 	if (enc == PG_UTF8 || enc == PG_SQL_ASCII)
-		pg_verify_mbstr_len(PG_UTF8, utf8_str, len, false);
+	{
+		pg_verify_mbstr_len(enc, utf8_str, len, false);
+		ret = utf8_str;
+	}
+	else
+		ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str,
+												 len, PG_UTF8, enc);
 
 	if (ret == utf8_str)
 		ret = pstrdup(ret);
@@ -27,11 +35,15 @@ utf_u2e(const char *utf8_str, size_t len)
 
 /*
  * convert from database encoding to utf8
+ *
+ * Returns a palloc'ed copy of the original string
  */
 static inline char *
 utf_e2u(const char *str)
 {
-	char	   *ret = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str), GetDatabaseEncoding(), PG_UTF8);
+	char	   *ret =
+		(char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
+										   GetDatabaseEncoding(), PG_UTF8);
 
 	if (ret == str)
 		ret = pstrdup(ret);
@@ -41,6 +53,8 @@ utf_e2u(const char *str)
 
 /*
  * Convert an SV to a char * in the current database encoding
+ *
+ * Returns a palloc'ed copy of the original string
  */
 static inline char *
 sv2cstr(SV *sv)
@@ -50,7 +64,9 @@ sv2cstr(SV *sv)
 
 	/*
 	 * get a utf8 encoded char * out of perl. *note* it may not be valid utf8!
-	 *
+	 */
+
+	/*
 	 * SvPVutf8() croaks nastily on certain things, like typeglobs and
 	 * readonly objects such as $^V. That's a perl bug - it's not supposed to
 	 * happen. To avoid crashing the backend, we make a copy of the sv before
@@ -62,15 +78,27 @@ sv2cstr(SV *sv)
 		(SvTYPE(sv) > SVt_PVLV && SvTYPE(sv) != SVt_PVFM))
 		sv = newSVsv(sv);
 	else
-		/* increase the reference count so we cant just SvREFCNT_dec() it when
-		 * we are done */
+	{
+		/*
+		 * increase the reference count so we can just SvREFCNT_dec() it when
+		 * we are done
+		 */
 		SvREFCNT_inc_simple_void(sv);
+	}
 
-	val = SvPVutf8(sv, len);
+	/*
+	 * Request the string from Perl, in UTF-8 encoding; but if we're in a
+	 * SQL_ASCII database, just request the byte soup without trying to make it
+	 * UTF8, because that might fail.
+	 */
+	if (GetDatabaseEncoding() == PG_SQL_ASCII)
+		val = SvPV(sv, len);
+	else
+		val = SvPVutf8(sv, len);
 
 	/*
-	 * we use perl's length in the event we had an embedded null byte to ensure
-	 * we error out properly
+	 * Now convert to database encoding.  We use perl's length in the event we
+	 * had an embedded null byte to ensure we error out properly.
 	 */
 	res = utf_u2e(val, len);
 
@@ -84,16 +112,20 @@ sv2cstr(SV *sv)
  * Create a new SV from a string assumed to be in the current database's
  * encoding.
  */
-
 static inline SV *
 cstr2sv(const char *str)
 {
 	SV		   *sv;
-	char	   *utf8_str = utf_e2u(str);
+	char	   *utf8_str;
+
+	/* no conversion when SQL_ASCII */
+	if (GetDatabaseEncoding() == PG_SQL_ASCII)
+		return newSVpv(str, 0);
+
+	utf8_str = utf_e2u(str);
 
 	sv = newSVpv(utf8_str, 0);
 	SvUTF8_on(sv);
-
 	pfree(utf8_str);
 
 	return sv;
author	Alvaro Herrera <alvherre@alvh.no-ip.org>	2012-07-09 17:36:29 -0400
committer	Alvaro Herrera <alvherre@alvh.no-ip.org>	2012-07-10 15:50:58 -0400
commit	fc661f78c6007103604fc43e98622a71400b769f (patch)
tree	13e87db428d62a65f816781623d8807d1b48c036 /src/pl/plperl/plperl_helpers.h
parent	1fbe7d377cb6bf070442794a677499cd2e7531cc (diff)
download	postgresql-fc661f78c6007103604fc43e98622a71400b769f.tar.gz postgresql-fc661f78c6007103604fc43e98622a71400b769f.zip