diff options
author | Andres Freund <andres@anarazel.de> | 2025-02-10 10:03:39 -0500 |
---|---|---|
committer | Andres Freund <andres@anarazel.de> | 2025-02-10 10:03:39 -0500 |
commit | 92e4170f421832208a75645f4a4ea94494bdad4d (patch) | |
tree | 98fea6b80aa818e6073f0fed8b4a3be40fab5d7b /src | |
parent | 56aa2dcddeb299f62dad98ba50ad8db47cddaf97 (diff) | |
download | postgresql-92e4170f421832208a75645f4a4ea94494bdad4d.tar.gz postgresql-92e4170f421832208a75645f4a4ea94494bdad4d.zip |
Fix handling of invalidly encoded data in escaping functions
Previously invalidly encoded input to various escaping functions could lead to
the escaped string getting incorrectly parsed by psql. To be safe, escaping
functions need to ensure that neither invalid nor incomplete multi-byte
characters can be used to "escape" from being quoted.
Functions which can report errors now return an error in more cases than
before. Functions that cannot report errors now replace invalid input bytes
with a byte sequence that cannot be used to escape the quotes and that is
guaranteed to error out when a query is sent to the server.
The following functions are fixed by this commit:
- PQescapeLiteral()
- PQescapeIdentifier()
- PQescapeString()
- PQescapeStringConn()
- fmtId()
- appendStringLiteral()
Reported-by: Stephen Fewer <stephen_fewer@rapid7.com>
Reviewed-by: Noah Misch <noah@leadboat.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Backpatch-through: 13
Security: CVE-2025-1094
Diffstat (limited to 'src')
-rw-r--r-- | src/fe_utils/string_utils.c | 170 | ||||
-rw-r--r-- | src/interfaces/libpq/fe-exec.c | 134 |
2 files changed, 236 insertions, 68 deletions
diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c index 2e2b0d18af4..75b4f777db5 100644 --- a/src/fe_utils/string_utils.c +++ b/src/fe_utils/string_utils.c @@ -104,6 +104,7 @@ fmtIdEnc(const char *rawid, int encoding) const char *cp; bool need_quotes = false; + size_t remaining = strlen(rawid); /* * These checks need to match the identifier production in scan.l. Don't @@ -117,7 +118,8 @@ fmtIdEnc(const char *rawid, int encoding) else { /* otherwise check the entire string */ - for (cp = rawid; *cp; cp++) + cp = rawid; + for (size_t i = 0; i < remaining; i++, cp++) { if (!((*cp >= 'a' && *cp <= 'z') || (*cp >= '0' && *cp <= '9') @@ -153,17 +155,90 @@ fmtIdEnc(const char *rawid, int encoding) else { appendPQExpBufferChar(id_return, '"'); - for (cp = rawid; *cp; cp++) + + cp = &rawid[0]; + while (remaining > 0) { - /* - * Did we find a double-quote in the string? Then make this a - * double double-quote per SQL99. Before, we put in a - * backslash/double-quote pair. - thomas 2000-08-05 - */ - if (*cp == '"') - appendPQExpBufferChar(id_return, '"'); - appendPQExpBufferChar(id_return, *cp); + int charlen; + + /* Fast path for plain ASCII */ + if (!IS_HIGHBIT_SET(*cp)) + { + /* + * Did we find a double-quote in the string? Then make this a + * double double-quote per SQL99. Before, we put in a + * backslash/double-quote pair. - thomas 2000-08-05 + */ + if (*cp == '"') + appendPQExpBufferChar(id_return, '"'); + appendPQExpBufferChar(id_return, *cp); + remaining--; + cp++; + continue; + } + + /* Slow path for possible multibyte characters */ + charlen = pg_encoding_mblen(encoding, cp); + + if (remaining < charlen) + { + /* + * If the character is longer than the available input, + * replace the string with an invalid sequence. The invalid + * sequence ensures that the escaped string will trigger an + * error on the server-side, even if we can't directly report + * an error here. + */ + enlargePQExpBuffer(id_return, 2); + pg_encoding_set_invalid(encoding, + id_return->data + id_return->len); + id_return->len += 2; + id_return->data[id_return->len] = '\0'; + + /* there's no more input data, so we can stop */ + break; + } + else if (pg_encoding_verifymbchar(encoding, cp, charlen) == -1) + { + /* + * Multibyte character is invalid. It's important to verify + * that as invalid multi-byte characters could e.g. be used to + * "skip" over quote characters, e.g. when parsing + * character-by-character. + * + * Replace the bytes corresponding to the invalid character + * with an invalid sequence, for the same reason as above. + * + * It would be a bit faster to verify the whole string the + * first time we encounter a set highbit, but this way we can + * replace just the invalid characters, which probably makes + * it easier for users to find the invalidly encoded portion + * of a larger string. + */ + enlargePQExpBuffer(id_return, 2); + pg_encoding_set_invalid(encoding, + id_return->data + id_return->len); + id_return->len += 2; + id_return->data[id_return->len] = '\0'; + + /* + * Copy the rest of the string after the invalid multi-byte + * character. + */ + remaining -= charlen; + cp += charlen; + } + else + { + for (int i = 0; i < charlen; i++) + { + appendPQExpBufferChar(id_return, *cp); + remaining--; + cp++; + } + } } + appendPQExpBufferChar(id_return, '"'); } @@ -290,6 +365,7 @@ appendStringLiteral(PQExpBuffer buf, const char *str, size_t length = strlen(str); const char *source = str; char *target; + size_t remaining = length; if (!enlargePQExpBuffer(buf, 2 * length + 2)) return; @@ -297,10 +373,10 @@ appendStringLiteral(PQExpBuffer buf, const char *str, target = buf->data + buf->len; *target++ = '\''; - while (*source != '\0') + while (remaining > 0) { char c = *source; - int len; + int charlen; int i; /* Fast path for plain ASCII */ @@ -312,39 +388,65 @@ appendStringLiteral(PQExpBuffer buf, const char *str, /* Copy the character */ *target++ = c; source++; + remaining--; continue; } /* Slow path for possible multibyte characters */ - len = PQmblen(source, encoding); + charlen = PQmblen(source, encoding); - /* Copy the character */ - for (i = 0; i < len; i++) + if (remaining < charlen) { - if (*source == '\0') - break; - *target++ = *source++; - } + /* + * If the character is longer than the available input, replace + * the string with an invalid sequence. The invalid sequence + * ensures that the escaped string will trigger an error on the + * server-side, even if we can't directly report an error here. + * + * We know there's enough space for the invalid sequence because + * the "target" buffer is 2 * length + 2 long, and at worst we're + * replacing a single input byte with two invalid bytes. + */ + pg_encoding_set_invalid(encoding, target); + target += 2; - /* - * If we hit premature end of string (ie, incomplete multibyte - * character), try to pad out to the correct length with spaces. We - * may not be able to pad completely, but we will always be able to - * insert at least one pad space (since we'd not have quoted a - * multibyte character). This should be enough to make a string that - * the server will error out on. - */ - if (i < len) + /* there's no more valid input data, so we can stop */ + break; + } + else if (pg_encoding_verifymbchar(encoding, source, charlen) == -1) { - char *stop = buf->data + buf->maxlen - 2; + /* + * Multibyte character is invalid. It's important to verify that + * as invalid multi-byte characters could e.g. be used to "skip" + * over quote characters, e.g. when parsing + * character-by-character. + * + * Replace the bytes corresponding to the invalid character with + * an invalid sequence, for the same reason as above. + * + * It would be a bit faster to verify the whole string the first + * time we encounter a set highbit, but this way we can replace + * just the invalid characters, which probably makes it easier for + * users to find the invalidly encoded portion of a larger string. + */ + pg_encoding_set_invalid(encoding, target); + target += 2; + remaining -= charlen; - for (; i < len; i++) + /* + * Copy the rest of the string after the invalid multi-byte + * character. + */ + source += charlen; + } + else + { + /* Copy the character */ + for (i = 0; i < charlen; i++) { - if (target >= stop) - break; - *target++ = ' '; + *target++ = *source++; + remaining--; } - break; } } diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index fa9d6aaddf5..ed249fb9689 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -3941,15 +3941,15 @@ PQescapeStringInternal(PGconn *conn, { const char *source = from; char *target = to; - size_t remaining = length; + size_t remaining = strnlen(from, length); if (error) *error = 0; - while (remaining > 0 && *source != '\0') + while (remaining > 0) { char c = *source; - int len; + int charlen; int i; /* Fast path for plain ASCII */ @@ -3966,38 +3966,76 @@ PQescapeStringInternal(PGconn *conn, } /* Slow path for possible multibyte characters */ - len = pg_encoding_mblen(encoding, source); + charlen = pg_encoding_mblen(encoding, source); - /* Copy the character */ - for (i = 0; i < len; i++) + if (remaining < charlen) { - if (remaining == 0 || *source == '\0') - break; - *target++ = *source++; - remaining--; - } + /* + * If the character is longer than the available input, report an + * error if possible, and replace the string with an invalid + * sequence. The invalid sequence ensures that the escaped string + * will trigger an error on the server-side, even if we can't + * directly report an error here. + * + * This isn't *that* crucial when we can report an error to the + * caller, but if we can't, the caller will use this string + * unmodified and it needs to be safe for parsing. + * + * We know there's enough space for the invalid sequence because + * the "to" buffer needs to be at least 2 * length + 1 long, and + * at worst we're replacing a single input byte with two invalid + * bytes. + */ + if (error) + *error = 1; + if (conn) + libpq_append_conn_error(conn, "incomplete multibyte character"); - /* - * If we hit premature end of string (ie, incomplete multibyte - * character), try to pad out to the correct length with spaces. We - * may not be able to pad completely, but we will always be able to - * insert at least one pad space (since we'd not have quoted a - * multibyte character). This should be enough to make a string that - * the server will error out on. - */ - if (i < len) + pg_encoding_set_invalid(encoding, target); + target += 2; + + /* there's no more input data, so we can stop */ + break; + } + else if (pg_encoding_verifymbchar(encoding, source, charlen) == -1) { + /* + * Multibyte character is invalid. It's important to verify that + * as invalid multi-byte characters could e.g. be used to "skip" + * over quote characters, e.g. when parsing + * character-by-character. + * + * Replace the bytes corresponding to the invalid character with + * an invalid sequence, for the same reason as above. + * + * It would be a bit faster to verify the whole string the first + * time we encounter a set highbit, but this way we can replace + * just the invalid characters, which probably makes it easier for + * users to find the invalidly encoded portion of a larger string. + */ if (error) *error = 1; if (conn) - libpq_append_conn_error(conn, "incomplete multibyte character"); - for (; i < len; i++) + libpq_append_conn_error(conn, "invalid multibyte character"); + + pg_encoding_set_invalid(encoding, target); + target += 2; + remaining -= charlen; + + /* + * Copy the rest of the string after the invalid multi-byte + * character. + */ + source += charlen; + } + else + { + /* Copy the character */ + for (i = 0; i < charlen; i++) { - if (((size_t) (target - to)) / 2 >= length) - break; - *target++ = ' '; + *target++ = *source++; + remaining--; } - break; } } @@ -4052,9 +4090,10 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident) char *rp; int num_quotes = 0; /* single or double, depending on as_ident */ int num_backslashes = 0; - int input_len; - int result_size; + size_t input_len = strlen(str); + size_t result_size; char quote_char = as_ident ? '"' : '\''; + bool validated_mb = false; /* We must have a connection, else fail immediately. */ if (!conn) @@ -4063,8 +4102,12 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident) if (conn->cmd_queue_head == NULL) pqClearConnErrorState(conn); - /* Scan the string for characters that must be escaped. */ - for (s = str; (s - str) < len && *s != '\0'; ++s) + /* + * Scan the string for characters that must be escaped and for invalidly + * encoded data. + */ + s = str; + for (size_t remaining = input_len; remaining > 0; remaining--, s++) { if (*s == quote_char) ++num_quotes; @@ -4077,20 +4120,41 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident) /* Slow path for possible multibyte characters */ charlen = pg_encoding_mblen(conn->client_encoding, s); - /* Multibyte character overruns allowable length. */ - if ((s - str) + charlen > len || memchr(s, 0, charlen) != NULL) + if (charlen > remaining) { + /* Multibyte character overruns allowable length. */ libpq_append_conn_error(conn, "incomplete multibyte character"); return NULL; } + /* + * If we haven't already, check that multibyte characters are + * valid. It's important to verify that as invalid multi-byte + * characters could e.g. be used to "skip" over quote characters, + * e.g. when parsing character-by-character. + * + * We check validity once, for the whole remainder of the string, + * when we first encounter any multi-byte character. Some + * encodings have optimized implementations for longer strings. + */ + if (!validated_mb) + { + if (pg_encoding_verifymbstr(conn->client_encoding, s, remaining) + != strlen(s)) + { + libpq_append_conn_error(conn, "invalid multibyte character"); + return NULL; + } + validated_mb = true; + } + /* Adjust s, bearing in mind that for loop will increment it. */ s += charlen - 1; + remaining -= charlen - 1; } } /* Allocate output buffer. */ - input_len = s - str; result_size = input_len + num_quotes + 3; /* two quotes, plus a NUL */ if (!as_ident && num_backslashes > 0) result_size += num_backslashes + 2; @@ -4135,7 +4199,8 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident) } else { - for (s = str; s - str < input_len; ++s) + s = str; + for (size_t remaining = input_len; remaining > 0; remaining--, s++) { if (*s == quote_char || (!as_ident && *s == '\\')) { @@ -4153,6 +4218,7 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident) *rp++ = *s; if (--i == 0) break; + remaining--; ++s; /* for loop will provide the final increment */ } } |