aboutsummaryrefslogtreecommitdiff
path: root/src/fe_utils/string_utils.c
diff options
context:
space:
mode:
authorAndres Freund <andres@anarazel.de>2025-02-10 10:03:38 -0500
committerAndres Freund <andres@anarazel.de>2025-02-10 10:03:38 -0500
commit61ad93cdd48ecc8c6edf943f4d888a9325b66882 (patch)
tree6d8a4f72c63a8173f4baa9388bc8f7a1621a5214 /src/fe_utils/string_utils.c
parent7d43ca6fe068015b403ffa1762f4df4efdf68b69 (diff)
downloadpostgresql-61ad93cdd48ecc8c6edf943f4d888a9325b66882.tar.gz
postgresql-61ad93cdd48ecc8c6edf943f4d888a9325b66882.zip
Specify the encoding of input to fmtId()
This commit adds fmtIdEnc() and fmtQualifiedIdEnc(), which allow to specify the encoding as an explicit argument. Additionally setFmtEncoding() is provided, which defines the encoding when no explicit encoding is provided, to avoid breaking all code using fmtId(). All users of fmtId()/fmtQualifiedId() are either converted to the explicit version or a call to setFmtEncoding() has been added. This commit does not yet utilize the now well-defined encoding, that will happen in a subsequent commit. Reviewed-by: Noah Misch <noah@leadboat.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Backpatch-through: 13 Security: CVE-2025-1094
Diffstat (limited to 'src/fe_utils/string_utils.c')
-rw-r--r--src/fe_utils/string_utils.c84
1 files changed, 78 insertions, 6 deletions
diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c
index 09fd33907dd..3000da913da 100644
--- a/src/fe_utils/string_utils.c
+++ b/src/fe_utils/string_utils.c
@@ -19,6 +19,7 @@
#include "common/keywords.h"
#include "fe_utils/string_utils.h"
+#include "mb/pg_wchar.h"
static PQExpBuffer defaultGetLocalPQExpBuffer(void);
@@ -26,6 +27,8 @@ static PQExpBuffer defaultGetLocalPQExpBuffer(void);
int quote_all_identifiers = 0;
PQExpBuffer (*getLocalPQExpBuffer) (void) = defaultGetLocalPQExpBuffer;
+static int fmtIdEncoding = -1;
+
/*
* Returns a temporary PQExpBuffer, valid until the next call to the function.
@@ -55,13 +58,47 @@ defaultGetLocalPQExpBuffer(void)
}
/*
+ * Set the encoding that fmtId() and fmtQualifiedId() use.
+ *
+ * This is not safe against multiple connections having different encodings,
+ * but there is no real other way to address the need to know the encoding for
+ * fmtId()/fmtQualifiedId() input for safe escaping. Eventually we should get
+ * rid of fmtId().
+ */
+void
+setFmtEncoding(int encoding)
+{
+ fmtIdEncoding = encoding;
+}
+
+/*
+ * Return the currently configured encoding for fmtId() and fmtQualifiedId().
+ */
+static int
+getFmtEncoding(void)
+{
+ if (fmtIdEncoding != -1)
+ return fmtIdEncoding;
+
+ /*
+ * In assertion builds it seems best to fail hard if the encoding was not
+ * set, to make it easier to find places with missing calls. But in
+ * production builds that seems like a bad idea, thus we instead just
+ * default to UTF-8.
+ */
+ Assert(fmtIdEncoding != -1);
+
+ return PG_UTF8;
+}
+
+/*
* Quotes input string if it's not a legitimate SQL identifier as-is.
*
- * Note that the returned string must be used before calling fmtId again,
+ * Note that the returned string must be used before calling fmtIdEnc again,
* since we re-use the same return buffer each time.
*/
const char *
-fmtId(const char *rawid)
+fmtIdEnc(const char *rawid, int encoding)
{
PQExpBuffer id_return = getLocalPQExpBuffer();
@@ -134,7 +171,24 @@ fmtId(const char *rawid)
}
/*
- * fmtQualifiedId - construct a schema-qualified name, with quoting as needed.
+ * Quotes input string if it's not a legitimate SQL identifier as-is.
+ *
+ * Note that the returned string must be used before calling fmtId again,
+ * since we re-use the same return buffer each time.
+ *
+ * NB: This assumes setFmtEncoding() previously has been called to configure
+ * the encoding of rawid. It is preferable to use fmtIdEnc() with an
+ * explicit encoding.
+ */
+const char *
+fmtId(const char *rawid)
+{
+ return fmtIdEnc(rawid, getFmtEncoding());
+}
+
+/*
+ * fmtQualifiedIdEnc - construct a schema-qualified name, with quoting as
+ * needed.
*
* Like fmtId, use the result before calling again.
*
@@ -142,7 +196,7 @@ fmtId(const char *rawid)
* use that buffer until we're finished with calling fmtId().
*/
const char *
-fmtQualifiedId(const char *schema, const char *id)
+fmtQualifiedIdEnc(const char *schema, const char *id, int encoding)
{
PQExpBuffer id_return;
PQExpBuffer lcl_pqexp = createPQExpBuffer();
@@ -150,9 +204,9 @@ fmtQualifiedId(const char *schema, const char *id)
/* Some callers might fail to provide a schema name */
if (schema && *schema)
{
- appendPQExpBuffer(lcl_pqexp, "%s.", fmtId(schema));
+ appendPQExpBuffer(lcl_pqexp, "%s.", fmtIdEnc(schema, encoding));
}
- appendPQExpBufferStr(lcl_pqexp, fmtId(id));
+ appendPQExpBufferStr(lcl_pqexp, fmtIdEnc(id, encoding));
id_return = getLocalPQExpBuffer();
@@ -162,6 +216,24 @@ fmtQualifiedId(const char *schema, const char *id)
return id_return->data;
}
+/*
+ * fmtQualifiedId - construct a schema-qualified name, with quoting as needed.
+ *
+ * Like fmtId, use the result before calling again.
+ *
+ * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
+ * use that buffer until we're finished with calling fmtId().
+ *
+ * NB: This assumes setFmtEncoding() previously has been called to configure
+ * the encoding of schema/id. It is preferable to use fmtQualifiedIdEnc()
+ * with an explicit encoding.
+ */
+const char *
+fmtQualifiedId(const char *schema, const char *id)
+{
+ return fmtQualifiedIdEnc(schema, id, getFmtEncoding());
+}
+
/*
* Format a Postgres version number (in the PG_VERSION_NUM integer format