aboutsummaryrefslogtreecommitdiff
path: root/src/common/string.c
diff options
context:
space:
mode:
authorPeter Eisentraut <peter@eisentraut.org>2022-09-13 16:10:44 +0200
committerPeter Eisentraut <peter@eisentraut.org>2022-09-13 16:10:44 +0200
commit45b1a67a0fcb3f1588df596431871de4c93cb76f (patch)
tree4bb98640fe0cfda9abc5e656c7c2ad7abec610e1 /src/common/string.c
parentda5d4ea5aaac4fc02f2e2aec272efe438dd4e171 (diff)
downloadpostgresql-45b1a67a0fcb3f1588df596431871de4c93cb76f.tar.gz
postgresql-45b1a67a0fcb3f1588df596431871de4c93cb76f.zip
pg_clean_ascii(): escape bytes rather than lose them
Rather than replace each unprintable byte with a '?' character, replace it with a hex escape instead. The API now allocates a copy rather than modifying the input in place. Author: Jacob Champion <jchampion@timescale.com> Discussion: https://www.postgresql.org/message-id/CAAWbhmgsvHrH9wLU2kYc3pOi1KSenHSLAHBbCVmmddW6-mc_=w@mail.gmail.com
Diffstat (limited to 'src/common/string.c')
-rw-r--r--src/common/string.c52
1 files changed, 43 insertions, 9 deletions
diff --git a/src/common/string.c b/src/common/string.c
index 16940d1fa7d..97b3d45d36c 100644
--- a/src/common/string.c
+++ b/src/common/string.c
@@ -22,6 +22,7 @@
#endif
#include "common/string.h"
+#include "lib/stringinfo.h"
/*
@@ -59,9 +60,12 @@ strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base)
/*
- * pg_clean_ascii -- Replace any non-ASCII chars with a '?' char
+ * pg_clean_ascii -- Replace any non-ASCII chars with a "\xXX" string
*
- * Modifies the string passed in which must be '\0'-terminated.
+ * Makes a newly allocated copy of the string passed in, which must be
+ * '\0'-terminated. In the backend, additional alloc_flags may be provided and
+ * will be passed as-is to palloc_extended(); in the frontend, alloc_flags is
+ * ignored and the copy is malloc'd.
*
* This function exists specifically to deal with filtering out
* non-ASCII characters in a few places where the client can provide an almost
@@ -73,22 +77,52 @@ strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base)
* In general, this function should NOT be used- instead, consider how to handle
* the string without needing to filter out the non-ASCII characters.
*
- * Ultimately, we'd like to improve the situation to not require stripping out
- * all non-ASCII but perform more intelligent filtering which would allow UTF or
+ * Ultimately, we'd like to improve the situation to not require replacing all
+ * non-ASCII but perform more intelligent filtering which would allow UTF or
* similar, but it's unclear exactly what we should allow, so stick to ASCII only
* for now.
*/
-void
-pg_clean_ascii(char *str)
+char *
+pg_clean_ascii(const char *str, int alloc_flags)
{
- /* Only allow clean ASCII chars in the string */
- char *p;
+ size_t dstlen;
+ char *dst;
+ const char *p;
+ size_t i = 0;
+
+ /* Worst case, each byte can become four bytes, plus a null terminator. */
+ dstlen = strlen(str) * 4 + 1;
+
+#ifdef FRONTEND
+ dst = malloc(dstlen);
+#else
+ dst = palloc_extended(dstlen, alloc_flags);
+#endif
+
+ if (!dst)
+ return NULL;
for (p = str; *p != '\0'; p++)
{
+
+ /* Only allow clean ASCII chars in the string */
if (*p < 32 || *p > 126)
- *p = '?';
+ {
+ Assert(i < (dstlen - 3));
+ snprintf(&dst[i], dstlen - i, "\\x%02x", (unsigned char) *p);
+ i += 4;
+ }
+ else
+ {
+ Assert(i < dstlen);
+ dst[i] = *p;
+ i++;
+ }
}
+
+ Assert(i < dstlen);
+ dst[i] = '\0';
+ return dst;
}