aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/fe_utils/string_utils.c170
-rw-r--r--src/interfaces/libpq/fe-exec.c134
2 files changed, 236 insertions, 68 deletions
diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c
index 3000da913da..e4561bafea6 100644
--- a/src/fe_utils/string_utils.c
+++ b/src/fe_utils/string_utils.c
@@ -104,6 +104,7 @@ fmtIdEnc(const char *rawid, int encoding)
const char *cp;
bool need_quotes = false;
+ size_t remaining = strlen(rawid);
/*
* These checks need to match the identifier production in scan.l. Don't
@@ -117,7 +118,8 @@ fmtIdEnc(const char *rawid, int encoding)
else
{
/* otherwise check the entire string */
- for (cp = rawid; *cp; cp++)
+ cp = rawid;
+ for (size_t i = 0; i < remaining; i++, cp++)
{
if (!((*cp >= 'a' && *cp <= 'z')
|| (*cp >= '0' && *cp <= '9')
@@ -153,17 +155,90 @@ fmtIdEnc(const char *rawid, int encoding)
else
{
appendPQExpBufferChar(id_return, '"');
- for (cp = rawid; *cp; cp++)
+
+ cp = &rawid[0];
+ while (remaining > 0)
{
- /*
- * Did we find a double-quote in the string? Then make this a
- * double double-quote per SQL99. Before, we put in a
- * backslash/double-quote pair. - thomas 2000-08-05
- */
- if (*cp == '"')
- appendPQExpBufferChar(id_return, '"');
- appendPQExpBufferChar(id_return, *cp);
+ int charlen;
+
+ /* Fast path for plain ASCII */
+ if (!IS_HIGHBIT_SET(*cp))
+ {
+ /*
+ * Did we find a double-quote in the string? Then make this a
+ * double double-quote per SQL99. Before, we put in a
+ * backslash/double-quote pair. - thomas 2000-08-05
+ */
+ if (*cp == '"')
+ appendPQExpBufferChar(id_return, '"');
+ appendPQExpBufferChar(id_return, *cp);
+ remaining--;
+ cp++;
+ continue;
+ }
+
+ /* Slow path for possible multibyte characters */
+ charlen = pg_encoding_mblen(encoding, cp);
+
+ if (remaining < charlen)
+ {
+ /*
+ * If the character is longer than the available input,
+ * replace the string with an invalid sequence. The invalid
+ * sequence ensures that the escaped string will trigger an
+ * error on the server-side, even if we can't directly report
+ * an error here.
+ */
+ enlargePQExpBuffer(id_return, 2);
+ pg_encoding_set_invalid(encoding,
+ id_return->data + id_return->len);
+ id_return->len += 2;
+ id_return->data[id_return->len] = '\0';
+
+ /* there's no more input data, so we can stop */
+ break;
+ }
+ else if (pg_encoding_verifymbchar(encoding, cp, charlen) == -1)
+ {
+ /*
+ * Multibyte character is invalid. It's important to verify
+ * that as invalid multi-byte characters could e.g. be used to
+ * "skip" over quote characters, e.g. when parsing
+ * character-by-character.
+ *
+ * Replace the bytes corresponding to the invalid character
+ * with an invalid sequence, for the same reason as above.
+ *
+ * It would be a bit faster to verify the whole string the
+ * first time we encounter a set highbit, but this way we can
+ * replace just the invalid characters, which probably makes
+ * it easier for users to find the invalidly encoded portion
+ * of a larger string.
+ */
+ enlargePQExpBuffer(id_return, 2);
+ pg_encoding_set_invalid(encoding,
+ id_return->data + id_return->len);
+ id_return->len += 2;
+ id_return->data[id_return->len] = '\0';
+
+ /*
+ * Copy the rest of the string after the invalid multi-byte
+ * character.
+ */
+ remaining -= charlen;
+ cp += charlen;
+ }
+ else
+ {
+ for (int i = 0; i < charlen; i++)
+ {
+ appendPQExpBufferChar(id_return, *cp);
+ remaining--;
+ cp++;
+ }
+ }
}
+
appendPQExpBufferChar(id_return, '"');
}
@@ -290,6 +365,7 @@ appendStringLiteral(PQExpBuffer buf, const char *str,
size_t length = strlen(str);
const char *source = str;
char *target;
+ size_t remaining = length;
if (!enlargePQExpBuffer(buf, 2 * length + 2))
return;
@@ -297,10 +373,10 @@ appendStringLiteral(PQExpBuffer buf, const char *str,
target = buf->data + buf->len;
*target++ = '\'';
- while (*source != '\0')
+ while (remaining > 0)
{
char c = *source;
- int len;
+ int charlen;
int i;
/* Fast path for plain ASCII */
@@ -312,39 +388,65 @@ appendStringLiteral(PQExpBuffer buf, const char *str,
/* Copy the character */
*target++ = c;
source++;
+ remaining--;
continue;
}
/* Slow path for possible multibyte characters */
- len = PQmblen(source, encoding);
+ charlen = PQmblen(source, encoding);
- /* Copy the character */
- for (i = 0; i < len; i++)
+ if (remaining < charlen)
{
- if (*source == '\0')
- break;
- *target++ = *source++;
- }
+ /*
+ * If the character is longer than the available input, replace
+ * the string with an invalid sequence. The invalid sequence
+ * ensures that the escaped string will trigger an error on the
+ * server-side, even if we can't directly report an error here.
+ *
+ * We know there's enough space for the invalid sequence because
+ * the "target" buffer is 2 * length + 2 long, and at worst we're
+ * replacing a single input byte with two invalid bytes.
+ */
+ pg_encoding_set_invalid(encoding, target);
+ target += 2;
- /*
- * If we hit premature end of string (ie, incomplete multibyte
- * character), try to pad out to the correct length with spaces. We
- * may not be able to pad completely, but we will always be able to
- * insert at least one pad space (since we'd not have quoted a
- * multibyte character). This should be enough to make a string that
- * the server will error out on.
- */
- if (i < len)
+ /* there's no more valid input data, so we can stop */
+ break;
+ }
+ else if (pg_encoding_verifymbchar(encoding, source, charlen) == -1)
{
- char *stop = buf->data + buf->maxlen - 2;
+ /*
+ * Multibyte character is invalid. It's important to verify that
+ * as invalid multi-byte characters could e.g. be used to "skip"
+ * over quote characters, e.g. when parsing
+ * character-by-character.
+ *
+ * Replace the bytes corresponding to the invalid character with
+ * an invalid sequence, for the same reason as above.
+ *
+ * It would be a bit faster to verify the whole string the first
+ * time we encounter a set highbit, but this way we can replace
+ * just the invalid characters, which probably makes it easier for
+ * users to find the invalidly encoded portion of a larger string.
+ */
+ pg_encoding_set_invalid(encoding, target);
+ target += 2;
+ remaining -= charlen;
- for (; i < len; i++)
+ /*
+ * Copy the rest of the string after the invalid multi-byte
+ * character.
+ */
+ source += charlen;
+ }
+ else
+ {
+ /* Copy the character */
+ for (i = 0; i < charlen; i++)
{
- if (target >= stop)
- break;
- *target++ = ' ';
+ *target++ = *source++;
+ remaining--;
}
- break;
}
}
diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
index 0d224a8524e..14fc5c77793 100644
--- a/src/interfaces/libpq/fe-exec.c
+++ b/src/interfaces/libpq/fe-exec.c
@@ -4075,15 +4075,15 @@ PQescapeStringInternal(PGconn *conn,
{
const char *source = from;
char *target = to;
- size_t remaining = length;
+ size_t remaining = strnlen(from, length);
if (error)
*error = 0;
- while (remaining > 0 && *source != '\0')
+ while (remaining > 0)
{
char c = *source;
- int len;
+ int charlen;
int i;
/* Fast path for plain ASCII */
@@ -4100,38 +4100,76 @@ PQescapeStringInternal(PGconn *conn,
}
/* Slow path for possible multibyte characters */
- len = pg_encoding_mblen(encoding, source);
+ charlen = pg_encoding_mblen(encoding, source);
- /* Copy the character */
- for (i = 0; i < len; i++)
+ if (remaining < charlen)
{
- if (remaining == 0 || *source == '\0')
- break;
- *target++ = *source++;
- remaining--;
- }
+ /*
+ * If the character is longer than the available input, report an
+ * error if possible, and replace the string with an invalid
+ * sequence. The invalid sequence ensures that the escaped string
+ * will trigger an error on the server-side, even if we can't
+ * directly report an error here.
+ *
+ * This isn't *that* crucial when we can report an error to the
+ * caller, but if we can't, the caller will use this string
+ * unmodified and it needs to be safe for parsing.
+ *
+ * We know there's enough space for the invalid sequence because
+ * the "to" buffer needs to be at least 2 * length + 1 long, and
+ * at worst we're replacing a single input byte with two invalid
+ * bytes.
+ */
+ if (error)
+ *error = 1;
+ if (conn)
+ libpq_append_conn_error(conn, "incomplete multibyte character");
- /*
- * If we hit premature end of string (ie, incomplete multibyte
- * character), try to pad out to the correct length with spaces. We
- * may not be able to pad completely, but we will always be able to
- * insert at least one pad space (since we'd not have quoted a
- * multibyte character). This should be enough to make a string that
- * the server will error out on.
- */
- if (i < len)
+ pg_encoding_set_invalid(encoding, target);
+ target += 2;
+
+ /* there's no more input data, so we can stop */
+ break;
+ }
+ else if (pg_encoding_verifymbchar(encoding, source, charlen) == -1)
{
+ /*
+ * Multibyte character is invalid. It's important to verify that
+ * as invalid multi-byte characters could e.g. be used to "skip"
+ * over quote characters, e.g. when parsing
+ * character-by-character.
+ *
+ * Replace the bytes corresponding to the invalid character with
+ * an invalid sequence, for the same reason as above.
+ *
+ * It would be a bit faster to verify the whole string the first
+ * time we encounter a set highbit, but this way we can replace
+ * just the invalid characters, which probably makes it easier for
+ * users to find the invalidly encoded portion of a larger string.
+ */
if (error)
*error = 1;
if (conn)
- libpq_append_conn_error(conn, "incomplete multibyte character");
- for (; i < len; i++)
+ libpq_append_conn_error(conn, "invalid multibyte character");
+
+ pg_encoding_set_invalid(encoding, target);
+ target += 2;
+ remaining -= charlen;
+
+ /*
+ * Copy the rest of the string after the invalid multi-byte
+ * character.
+ */
+ source += charlen;
+ }
+ else
+ {
+ /* Copy the character */
+ for (i = 0; i < charlen; i++)
{
- if (((size_t) (target - to)) / 2 >= length)
- break;
- *target++ = ' ';
+ *target++ = *source++;
+ remaining--;
}
- break;
}
}
@@ -4186,9 +4224,10 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident)
char *rp;
int num_quotes = 0; /* single or double, depending on as_ident */
int num_backslashes = 0;
- int input_len;
- int result_size;
+ size_t input_len = strlen(str);
+ size_t result_size;
char quote_char = as_ident ? '"' : '\'';
+ bool validated_mb = false;
/* We must have a connection, else fail immediately. */
if (!conn)
@@ -4197,8 +4236,12 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident)
if (conn->cmd_queue_head == NULL)
pqClearConnErrorState(conn);
- /* Scan the string for characters that must be escaped. */
- for (s = str; (s - str) < len && *s != '\0'; ++s)
+ /*
+ * Scan the string for characters that must be escaped and for invalidly
+ * encoded data.
+ */
+ s = str;
+ for (size_t remaining = input_len; remaining > 0; remaining--, s++)
{
if (*s == quote_char)
++num_quotes;
@@ -4211,20 +4254,41 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident)
/* Slow path for possible multibyte characters */
charlen = pg_encoding_mblen(conn->client_encoding, s);
- /* Multibyte character overruns allowable length. */
- if ((s - str) + charlen > len || memchr(s, 0, charlen) != NULL)
+ if (charlen > remaining)
{
+ /* Multibyte character overruns allowable length. */
libpq_append_conn_error(conn, "incomplete multibyte character");
return NULL;
}
+ /*
+ * If we haven't already, check that multibyte characters are
+ * valid. It's important to verify that as invalid multi-byte
+ * characters could e.g. be used to "skip" over quote characters,
+ * e.g. when parsing character-by-character.
+ *
+ * We check validity once, for the whole remainder of the string,
+ * when we first encounter any multi-byte character. Some
+ * encodings have optimized implementations for longer strings.
+ */
+ if (!validated_mb)
+ {
+ if (pg_encoding_verifymbstr(conn->client_encoding, s, remaining)
+ != strlen(s))
+ {
+ libpq_append_conn_error(conn, "invalid multibyte character");
+ return NULL;
+ }
+ validated_mb = true;
+ }
+
/* Adjust s, bearing in mind that for loop will increment it. */
s += charlen - 1;
+ remaining -= charlen - 1;
}
}
/* Allocate output buffer. */
- input_len = s - str;
result_size = input_len + num_quotes + 3; /* two quotes, plus a NUL */
if (!as_ident && num_backslashes > 0)
result_size += num_backslashes + 2;
@@ -4269,7 +4333,8 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident)
}
else
{
- for (s = str; s - str < input_len; ++s)
+ s = str;
+ for (size_t remaining = input_len; remaining > 0; remaining--, s++)
{
if (*s == quote_char || (!as_ident && *s == '\\'))
{
@@ -4287,6 +4352,7 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident)
*rp++ = *s;
if (--i == 0)
break;
+ remaining--;
++s; /* for loop will provide the final increment */
}
}