aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/mb/mbutils.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2006-05-21 20:06:18 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2006-05-21 20:06:18 +0000
commit70794254a1cc8557e4bddafbfdc4ff2567c653ce (patch)
tree258f3dbb8f56b7ede8a1ff5646722813e99f176f /src/backend/utils/mb/mbutils.c
parent833e84621cf0ed48f1f5537aa5b569f333e02105 (diff)
downloadpostgresql-70794254a1cc8557e4bddafbfdc4ff2567c653ce.tar.gz
postgresql-70794254a1cc8557e4bddafbfdc4ff2567c653ce.zip
Change the backend to reject strings containing invalidly-encoded multibyte
characters in all cases. Formerly we mostly just threw warnings for invalid input, and failed to detect it at all if no encoding conversion was required. The tighter check is needed to defend against SQL-injection attacks as per CVE-2006-2313 (further details will be published after release). Embedded zero (null) bytes will be rejected as well. The checks are applied during input to the backend (receipt from client or COPY IN), so it no longer seems necessary to check in textin() and related routines; any string arriving at those functions will already have been validated. Conversion failure reporting (for characters with no equivalent in the destination encoding) has been cleaned up and made consistent while at it. Also, fix a few longstanding errors in little-used encoding conversion routines: win1251_to_iso, win866_to_iso, euc_tw_to_big5, euc_tw_to_mic, mic_to_euc_tw were all broken to varying extents. Patches by Tatsuo Ishii and Tom Lane. Thanks to Akio Ishida and Yasuo Ohgaki for identifying the security issues.
Diffstat (limited to 'src/backend/utils/mb/mbutils.c')
-rw-r--r--src/backend/utils/mb/mbutils.c61
1 files changed, 49 insertions, 12 deletions
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 9718e7e73ee..da5f8e66c57 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -4,7 +4,7 @@
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
*
- * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.48 2004/10/13 01:25:12 neilc Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.48.4.1 2006/05/21 20:06:16 tgl Exp $
*/
#include "postgres.h"
@@ -370,8 +370,49 @@ pg_client_to_server(unsigned char *s, int len)
Assert(DatabaseEncoding);
Assert(ClientEncoding);
- if (ClientEncoding->encoding == DatabaseEncoding->encoding)
+ if (len <= 0)
+ return s;
+
+ if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
+ ClientEncoding->encoding == PG_SQL_ASCII)
+ {
+ /*
+ * No conversion is needed, but we must still validate the data.
+ */
+ (void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
return s;
+ }
+
+ if (DatabaseEncoding->encoding == PG_SQL_ASCII)
+ {
+ /*
+ * No conversion is possible, but we must still validate the data,
+ * because the client-side code might have done string escaping
+ * using the selected client_encoding. If the client encoding is
+ * ASCII-safe then we just do a straight validation under that
+ * encoding. For an ASCII-unsafe encoding we have a problem:
+ * we dare not pass such data to the parser but we have no way
+ * to convert it. We compromise by rejecting the data if it
+ * contains any non-ASCII characters.
+ */
+ if (PG_VALID_BE_ENCODING(ClientEncoding->encoding))
+ (void) pg_verify_mbstr(ClientEncoding->encoding, s, len, false);
+ else
+ {
+ int i;
+
+ for (i = 0; i < len; i++)
+ {
+ if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
+ ereport(ERROR,
+ (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+ errmsg("invalid byte value for encoding \"%s\": 0x%02x",
+ pg_enc2name_tbl[PG_SQL_ASCII].name,
+ (unsigned char) s[i])));
+ }
+ }
+ return s;
+ }
return perform_default_encoding_conversion(s, len, true);
}
@@ -385,9 +426,14 @@ pg_server_to_client(unsigned char *s, int len)
Assert(DatabaseEncoding);
Assert(ClientEncoding);
- if (ClientEncoding->encoding == DatabaseEncoding->encoding)
+ if (len <= 0)
return s;
+ if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
+ ClientEncoding->encoding == PG_SQL_ASCII ||
+ DatabaseEncoding->encoding == PG_SQL_ASCII)
+ return s; /* assume data is valid */
+
return perform_default_encoding_conversion(s, len, false);
}
@@ -406,9 +452,6 @@ perform_default_encoding_conversion(unsigned char *src, int len, bool is_client_
dest_encoding;
FmgrInfo *flinfo;
- if (len <= 0)
- return src;
-
if (is_client_to_server)
{
src_encoding = ClientEncoding->encoding;
@@ -425,12 +468,6 @@ perform_default_encoding_conversion(unsigned char *src, int len, bool is_client_
if (flinfo == NULL)
return src;
- if (src_encoding == dest_encoding)
- return src;
-
- if (src_encoding == PG_SQL_ASCII || dest_encoding == PG_SQL_ASCII)
- return src;
-
result = palloc(len * 4 + 1);
FunctionCall5(flinfo,