diff options
Diffstat (limited to 'src/fe_utils/string_utils.c')
-rw-r--r-- | src/fe_utils/string_utils.c | 91 |
1 files changed, 34 insertions, 57 deletions
diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c index c6e2770e917..78b3e7ee67b 100644 --- a/src/fe_utils/string_utils.c +++ b/src/fe_utils/string_utils.c @@ -180,40 +180,25 @@ fmtIdEnc(const char *rawid, int encoding) /* Slow path for possible multibyte characters */ charlen = pg_encoding_mblen(encoding, cp); - if (remaining < charlen) - { - /* - * If the character is longer than the available input, - * replace the string with an invalid sequence. The invalid - * sequence ensures that the escaped string will trigger an - * error on the server-side, even if we can't directly report - * an error here. - */ - enlargePQExpBuffer(id_return, 2); - pg_encoding_set_invalid(encoding, - id_return->data + id_return->len); - id_return->len += 2; - id_return->data[id_return->len] = '\0'; - - /* there's no more input data, so we can stop */ - break; - } - else if (pg_encoding_verifymbchar(encoding, cp, charlen) == -1) + if (remaining < charlen || + pg_encoding_verifymbchar(encoding, cp, charlen) == -1) { /* * Multibyte character is invalid. It's important to verify - * that as invalid multi-byte characters could e.g. be used to + * that as invalid multibyte characters could e.g. be used to * "skip" over quote characters, e.g. when parsing * character-by-character. * - * Replace the bytes corresponding to the invalid character - * with an invalid sequence, for the same reason as above. + * Replace the character's first byte with an invalid + * sequence. The invalid sequence ensures that the escaped + * string will trigger an error on the server-side, even if we + * can't directly report an error here. * * It would be a bit faster to verify the whole string the * first time we encounter a set highbit, but this way we can - * replace just the invalid characters, which probably makes - * it easier for users to find the invalidly encoded portion - * of a larger string. + * replace just the invalid data, which probably makes it + * easier for users to find the invalidly encoded portion of a + * larger string. */ enlargePQExpBuffer(id_return, 2); pg_encoding_set_invalid(encoding, @@ -222,11 +207,13 @@ fmtIdEnc(const char *rawid, int encoding) id_return->data[id_return->len] = '\0'; /* - * Copy the rest of the string after the invalid multi-byte - * character. + * Handle the following bytes as if this byte didn't exist. + * That's safer in case the subsequent bytes contain + * characters that are significant for the caller (e.g. '>' in + * html). */ - remaining -= charlen; - cp += charlen; + remaining--; + cp++; } else { @@ -395,49 +382,39 @@ appendStringLiteral(PQExpBuffer buf, const char *str, /* Slow path for possible multibyte characters */ charlen = PQmblen(source, encoding); - if (remaining < charlen) - { - /* - * If the character is longer than the available input, replace - * the string with an invalid sequence. The invalid sequence - * ensures that the escaped string will trigger an error on the - * server-side, even if we can't directly report an error here. - * - * We know there's enough space for the invalid sequence because - * the "target" buffer is 2 * length + 2 long, and at worst we're - * replacing a single input byte with two invalid bytes. - */ - pg_encoding_set_invalid(encoding, target); - target += 2; - - /* there's no more valid input data, so we can stop */ - break; - } - else if (pg_encoding_verifymbchar(encoding, source, charlen) == -1) + if (remaining < charlen || + pg_encoding_verifymbchar(encoding, source, charlen) == -1) { /* * Multibyte character is invalid. It's important to verify that - * as invalid multi-byte characters could e.g. be used to "skip" + * as invalid multibyte characters could e.g. be used to "skip" * over quote characters, e.g. when parsing * character-by-character. * - * Replace the bytes corresponding to the invalid character with - * an invalid sequence, for the same reason as above. + * Replace the character's first byte with an invalid sequence. + * The invalid sequence ensures that the escaped string will + * trigger an error on the server-side, even if we can't directly + * report an error here. + * + * We know there's enough space for the invalid sequence because + * the "target" buffer is 2 * length + 2 long, and at worst we're + * replacing a single input byte with two invalid bytes. * * It would be a bit faster to verify the whole string the first * time we encounter a set highbit, but this way we can replace - * just the invalid characters, which probably makes it easier for - * users to find the invalidly encoded portion of a larger string. + * just the invalid data, which probably makes it easier for users + * to find the invalidly encoded portion of a larger string. */ pg_encoding_set_invalid(encoding, target); target += 2; - remaining -= charlen; /* - * Copy the rest of the string after the invalid multi-byte - * character. + * Handle the following bytes as if this byte didn't exist. That's + * safer in case the subsequent bytes contain important characters + * for the caller (e.g. '>' in html). */ - source += charlen; + source++; + remaining--; } else { |