aboutsummaryrefslogtreecommitdiff
path: root/src/interfaces/libpq/fe-exec.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/interfaces/libpq/fe-exec.c')
-rw-r--r--src/interfaces/libpq/fe-exec.c73
1 files changed, 33 insertions, 40 deletions
diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
index 3e603c6d7c1..4a4731d048c 100644
--- a/src/interfaces/libpq/fe-exec.c
+++ b/src/interfaces/libpq/fe-exec.c
@@ -3978,6 +3978,7 @@ PQescapeStringInternal(PGconn *conn,
const char *source = from;
char *target = to;
size_t remaining = strnlen(from, length);
+ bool already_complained = false;
if (error)
*error = 0;
@@ -4004,67 +4005,59 @@ PQescapeStringInternal(PGconn *conn,
/* Slow path for possible multibyte characters */
charlen = pg_encoding_mblen(encoding, source);
- if (remaining < charlen)
+ if (remaining < charlen ||
+ pg_encoding_verifymbchar(encoding, source, charlen) == -1)
{
/*
- * If the character is longer than the available input, report an
- * error if possible, and replace the string with an invalid
- * sequence. The invalid sequence ensures that the escaped string
- * will trigger an error on the server-side, even if we can't
- * directly report an error here.
+ * Multibyte character is invalid. It's important to verify that
+ * as invalid multibyte characters could e.g. be used to "skip"
+ * over quote characters, e.g. when parsing
+ * character-by-character.
+ *
+ * Report an error if possible, and replace the character's first
+ * byte with an invalid sequence. The invalid sequence ensures
+ * that the escaped string will trigger an error on the
+ * server-side, even if we can't directly report an error here.
*
* This isn't *that* crucial when we can report an error to the
- * caller, but if we can't, the caller will use this string
- * unmodified and it needs to be safe for parsing.
+ * caller; but if we can't or the caller ignores it, the caller
+ * will use this string unmodified and it needs to be safe for
+ * parsing.
*
* We know there's enough space for the invalid sequence because
* the "to" buffer needs to be at least 2 * length + 1 long, and
* at worst we're replacing a single input byte with two invalid
* bytes.
- */
- if (error)
- *error = 1;
- if (conn)
- appendPQExpBufferStr(&conn->errorMessage,
- libpq_gettext("incomplete multibyte character\n"));
-
- pg_encoding_set_invalid(encoding, target);
- target += 2;
-
- /* there's no more input data, so we can stop */
- break;
- }
- else if (pg_encoding_verifymbchar(encoding, source, charlen) == -1)
- {
- /*
- * Multibyte character is invalid. It's important to verify that
- * as invalid multi-byte characters could e.g. be used to "skip"
- * over quote characters, e.g. when parsing
- * character-by-character.
- *
- * Replace the bytes corresponding to the invalid character with
- * an invalid sequence, for the same reason as above.
*
* It would be a bit faster to verify the whole string the first
* time we encounter a set highbit, but this way we can replace
- * just the invalid characters, which probably makes it easier for
- * users to find the invalidly encoded portion of a larger string.
+ * just the invalid data, which probably makes it easier for users
+ * to find the invalidly encoded portion of a larger string.
*/
if (error)
*error = 1;
- if (conn)
- appendPQExpBufferStr(&conn->errorMessage,
- libpq_gettext("invalid multibyte character\n"));
+ if (conn && !already_complained)
+ {
+ if (remaining < charlen)
+ appendPQExpBufferStr(&conn->errorMessage,
+ libpq_gettext("incomplete multibyte character"));
+ else
+ appendPQExpBufferStr(&conn->errorMessage,
+ libpq_gettext("invalid multibyte character"));
+ /* Issue a complaint only once per string */
+ already_complained = true;
+ }
pg_encoding_set_invalid(encoding, target);
target += 2;
- remaining -= charlen;
/*
- * Copy the rest of the string after the invalid multi-byte
- * character.
+ * Handle the following bytes as if this byte didn't exist. That's
+ * safer in case the subsequent bytes contain important characters
+ * for the caller (e.g. '>' in html).
*/
- source += charlen;
+ source++;
+ remaining--;
}
else
{