aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils
diff options
context:
space:
mode:
authorNoah Misch <noah@leadboat.com>2025-05-05 04:52:04 -0700
committerNoah Misch <noah@leadboat.com>2025-05-05 04:52:04 -0700
commit627acc3caa74caa736b2c5587e944d2ea510ea67 (patch)
tree7aed8aec94c841de34fad669bd8b4376ff15c1a3 /src/backend/utils
parent5be213caaa1a9a65dfdbbf400b6a53b5e743b8d1 (diff)
downloadpostgresql-627acc3caa74caa736b2c5587e944d2ea510ea67.tar.gz
postgresql-627acc3caa74caa736b2c5587e944d2ea510ea67.zip
With GB18030, prevent SIGSEGV from reading past end of allocation.
With GB18030 as source encoding, applications could crash the server via SQL functions convert() or convert_from(). Applications themselves could crash after passing unterminated GB18030 input to libpq functions PQescapeLiteral(), PQescapeIdentifier(), PQescapeStringConn(), or PQescapeString(). Extension code could crash by passing unterminated GB18030 input to jsonapi.h functions. All those functions have been intended to handle untrusted, unterminated input safely. A crash required allocating the input such that the last byte of the allocation was the last byte of a virtual memory page. Some malloc() implementations take measures against that, making the SIGSEGV hard to reach. Back-patch to v13 (all supported versions). Author: Noah Misch <noah@leadboat.com> Author: Andres Freund <andres@anarazel.de> Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com> Backpatch-through: 13 Security: CVE-2025-4207
Diffstat (limited to 'src/backend/utils')
-rw-r--r--src/backend/utils/mb/mbutils.c18
1 files changed, 13 insertions, 5 deletions
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 5ddba5bccb4..308016d7763 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -1087,7 +1087,7 @@ pg_mbcliplen(const char *mbstr, int len, int limit)
}
/*
- * pg_mbcliplen with specified encoding
+ * pg_mbcliplen with specified encoding; string must be valid in encoding
*/
int
pg_encoding_mbcliplen(int encoding, const char *mbstr,
@@ -1692,12 +1692,12 @@ check_encoding_conversion_args(int src_encoding,
* report_invalid_encoding: complain about invalid multibyte character
*
* note: len is remaining length of string, not length of character;
- * len must be greater than zero, as we always examine the first byte.
+ * len must be greater than zero (or we'd neglect initializing "buf").
*/
void
report_invalid_encoding(int encoding, const char *mbstr, int len)
{
- int l = pg_encoding_mblen(encoding, mbstr);
+ int l = pg_encoding_mblen_or_incomplete(encoding, mbstr, len);
char buf[8 * 5 + 1];
char *p = buf;
int j,
@@ -1724,18 +1724,26 @@ report_invalid_encoding(int encoding, const char *mbstr, int len)
* report_untranslatable_char: complain about untranslatable character
*
* note: len is remaining length of string, not length of character;
- * len must be greater than zero, as we always examine the first byte.
+ * len must be greater than zero (or we'd neglect initializing "buf").
*/
void
report_untranslatable_char(int src_encoding, int dest_encoding,
const char *mbstr, int len)
{
- int l = pg_encoding_mblen(src_encoding, mbstr);
+ int l;
char buf[8 * 5 + 1];
char *p = buf;
int j,
jlimit;
+ /*
+ * We probably could use plain pg_encoding_mblen(), because
+ * gb18030_to_utf8() verifies before it converts. All conversions should.
+ * For src_encoding!=GB18030, len>0 meets pg_encoding_mblen() needs. Even
+ * so, be defensive, since a buggy conversion might pass invalid data.
+ * This is not a performance-critical path.
+ */
+ l = pg_encoding_mblen_or_incomplete(src_encoding, mbstr, len);
jlimit = Min(l, len);
jlimit = Min(jlimit, 8); /* prevent buffer overrun */