1 files changed, 33 insertions, 40 deletions
diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
index 3e603c6d7c1..4a4731d048c 100644
--- a/src/interfaces/libpq/fe-exec.c
+++ b/src/interfaces/libpq/fe-exec.c
@@ -3978,6 +3978,7 @@ PQescapeStringInternal(PGconn *conn,
 	const char *source = from;
 	char	   *target = to;
 	size_t		remaining = strnlen(from, length);
+	bool		already_complained = false;
 
 	if (error)
 		*error = 0;
@@ -4004,67 +4005,59 @@ PQescapeStringInternal(PGconn *conn,
 		/* Slow path for possible multibyte characters */
 		charlen = pg_encoding_mblen(encoding, source);
 
-		if (remaining < charlen)
+		if (remaining < charlen ||
+			pg_encoding_verifymbchar(encoding, source, charlen) == -1)
 		{
 			/*
-			 * If the character is longer than the available input, report an
-			 * error if possible, and replace the string with an invalid
-			 * sequence. The invalid sequence ensures that the escaped string
-			 * will trigger an error on the server-side, even if we can't
-			 * directly report an error here.
+			 * Multibyte character is invalid.  It's important to verify that
+			 * as invalid multibyte characters could e.g. be used to "skip"
+			 * over quote characters, e.g. when parsing
+			 * character-by-character.
+			 *
+			 * Report an error if possible, and replace the character's first
+			 * byte with an invalid sequence. The invalid sequence ensures
+			 * that the escaped string will trigger an error on the
+			 * server-side, even if we can't directly report an error here.
 			 *
 			 * This isn't *that* crucial when we can report an error to the
-			 * caller, but if we can't, the caller will use this string
-			 * unmodified and it needs to be safe for parsing.
+			 * caller; but if we can't or the caller ignores it, the caller
+			 * will use this string unmodified and it needs to be safe for
+			 * parsing.
 			 *
 			 * We know there's enough space for the invalid sequence because
 			 * the "to" buffer needs to be at least 2 * length + 1 long, and
 			 * at worst we're replacing a single input byte with two invalid
 			 * bytes.
-			 */
-			if (error)
-				*error = 1;
-			if (conn)
-				appendPQExpBufferStr(&conn->errorMessage,
-									 libpq_gettext("incomplete multibyte character\n"));
-
-			pg_encoding_set_invalid(encoding, target);
-			target += 2;
-
-			/* there's no more input data, so we can stop */
-			break;
-		}
-		else if (pg_encoding_verifymbchar(encoding, source, charlen) == -1)
-		{
-			/*
-			 * Multibyte character is invalid.  It's important to verify that
-			 * as invalid multi-byte characters could e.g. be used to "skip"
-			 * over quote characters, e.g. when parsing
-			 * character-by-character.
-			 *
-			 * Replace the bytes corresponding to the invalid character with
-			 * an invalid sequence, for the same reason as above.
 			 *
 			 * It would be a bit faster to verify the whole string the first
 			 * time we encounter a set highbit, but this way we can replace
-			 * just the invalid characters, which probably makes it easier for
-			 * users to find the invalidly encoded portion of a larger string.
+			 * just the invalid data, which probably makes it easier for users
+			 * to find the invalidly encoded portion of a larger string.
 			 */
 			if (error)
 				*error = 1;
-			if (conn)
-				appendPQExpBufferStr(&conn->errorMessage,
-									 libpq_gettext("invalid multibyte character\n"));
+			if (conn && !already_complained)
+			{
+				if (remaining < charlen)
+					appendPQExpBufferStr(&conn->errorMessage,
+										 libpq_gettext("incomplete multibyte character"));
+				else
+					appendPQExpBufferStr(&conn->errorMessage,
+										 libpq_gettext("invalid multibyte character"));
+				/* Issue a complaint only once per string */
+				already_complained = true;
+			}
 
 			pg_encoding_set_invalid(encoding, target);
 			target += 2;
-			remaining -= charlen;
 
 			/*
-			 * Copy the rest of the string after the invalid multi-byte
-			 * character.
+			 * Handle the following bytes as if this byte didn't exist. That's
+			 * safer in case the subsequent bytes contain important characters
+			 * for the caller (e.g. '>' in html).
 			 */
-			source += charlen;
+			source++;
+			remaining--;
 		}
 		else
 		{