diff options
author | Andres Freund <andres@anarazel.de> | 2025-02-10 10:03:40 -0500 |
---|---|---|
committer | Andres Freund <andres@anarazel.de> | 2025-02-10 10:03:40 -0500 |
commit | db3eb0e8256a7089d16cb6ed1ea7a65654c0e105 (patch) | |
tree | 0ec4af31c90820568052d53b49ed0e31d7f7f5d9 /src/common/wchar.c | |
parent | 00f1a1f665f078f5abadbf8baddc5c187fba80f8 (diff) | |
download | postgresql-db3eb0e8256a7089d16cb6ed1ea7a65654c0e105.tar.gz postgresql-db3eb0e8256a7089d16cb6ed1ea7a65654c0e105.zip |
Add pg_encoding_set_invalid()
There are cases where we cannot / do not want to error out for invalidly
encoded input. In such cases it can be useful to replace e.g. an incomplete
multi-byte characters with bytes that will trigger an error when getting
validated as part of a larger string.
Unfortunately, until now, for some encoding no such sequence existed. For
those encodings this commit removes one previously accepted input combination
- we consider that to be ok, as the chosen bytes are outside of the valid
ranges for the encodings, we just previously failed to detect that.
As we cannot add a new field to pg_wchar_table without breaking ABI, this is
implemented "in-line" in the newly added function.
Author: Noah Misch <noah@leadboat.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Backpatch-through: 13
Security: CVE-2025-1094
Diffstat (limited to 'src/common/wchar.c')
-rw-r--r-- | src/common/wchar.c | 55 |
1 files changed, 54 insertions, 1 deletions
diff --git a/src/common/wchar.c b/src/common/wchar.c index 2d044ee4ffb..85822b2c3b5 100644 --- a/src/common/wchar.c +++ b/src/common/wchar.c @@ -16,6 +16,25 @@ /* + * In today's multibyte encodings other than UTF8, this two-byte sequence + * ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0. + * + * For historical reasons, several verifychar implementations opt to reject + * this pair specifically. Byte pair range constraints, in encoding + * originator documentation, always excluded this pair. No core conversion + * could translate it. However, longstanding verifychar implementations + * accepted any non-NUL byte. big5_to_euc_tw and big5_to_mic even translate + * pairs not valid per encoding originator documentation. To avoid tightening + * core or non-core conversions in a security patch, we sought this one pair. + * + * PQescapeString() historically used spaces for BYTE1; many other values + * could suffice for BYTE1. + */ +#define NONUTF8_INVALID_BYTE0 (0x8d) +#define NONUTF8_INVALID_BYTE1 (' ') + + +/* * Operations on multi-byte encodings are driven by a table of helper * functions. * @@ -1330,6 +1349,11 @@ pg_big5_verifier(const unsigned char *s, int len) if (len < l) return -1; + if (l == 2 && + s[0] == NONUTF8_INVALID_BYTE0 && + s[1] == NONUTF8_INVALID_BYTE1) + return -1; + while (--l > 0) { if (*++s == '\0') @@ -1350,6 +1374,11 @@ pg_gbk_verifier(const unsigned char *s, int len) if (len < l) return -1; + if (l == 2 && + s[0] == NONUTF8_INVALID_BYTE0 && + s[1] == NONUTF8_INVALID_BYTE1) + return -1; + while (--l > 0) { if (*++s == '\0') @@ -1370,6 +1399,11 @@ pg_uhc_verifier(const unsigned char *s, int len) if (len < l) return -1; + if (l == 2 && + s[0] == NONUTF8_INVALID_BYTE0 && + s[1] == NONUTF8_INVALID_BYTE1) + return -1; + while (--l > 0) { if (*++s == '\0') @@ -1497,6 +1531,19 @@ pg_utf8_islegal(const unsigned char *source, int length) /* + * Fills the provided buffer with two bytes such that: + * pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0 + */ +void +pg_encoding_set_invalid(int encoding, char *dst) +{ + Assert(pg_encoding_max_length(encoding) > 1); + + dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0); + dst[1] = NONUTF8_INVALID_BYTE1; +} + +/* *------------------------------------------------------------------- * encoding info table * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h) @@ -1671,5 +1718,11 @@ pg_encoding_max_length(int encoding) { Assert(PG_VALID_ENCODING(encoding)); - return pg_wchar_table[encoding].maxmblen; + /* + * Check for the encoding despite the assert, due to some mingw versions + * otherwise issuing bogus warnings. + */ + return PG_VALID_ENCODING(encoding) ? + pg_wchar_table[encoding].maxmblen : + pg_wchar_table[PG_SQL_ASCII].maxmblen; } |