aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/formatting.c
diff options
context:
space:
mode:
authorJeff Davis <jdavis@postgresql.org>2025-01-24 14:56:22 -0800
committerJeff Davis <jdavis@postgresql.org>2025-01-24 14:56:22 -0800
commitbfc5992069cf00b189af83d96a83ae5ebb65e938 (patch)
tree94332f38e12deb4a6dcfdc011c42848069190ec5 /src/backend/utils/adt/formatting.c
parentf15538cd27d4eeb7d665263a3d7b5700362d7eb0 (diff)
downloadpostgresql-bfc5992069cf00b189af83d96a83ae5ebb65e938.tar.gz
postgresql-bfc5992069cf00b189af83d96a83ae5ebb65e938.zip
Add SQL function CASEFOLD().
Useful for caseless matching. Similar to LOWER(), but avoids edge-case problems with using LOWER() for caseless matching. For collations that support it, CASEFOLD() handles characters with more than two case variations or multi-character case variations. Some characters may fold to uppercase. The results of case folding are also more stable across Unicode versions than LOWER() or UPPER(). Discussion: https://postgr.es/m/a1886ddfcd8f60cb3e905c93009b646b4cfb74c5.camel%40j-davis.com Reviewed-by: Ian Lawrence Barwick
Diffstat (limited to 'src/backend/utils/adt/formatting.c')
-rw-r--r--src/backend/utils/adt/formatting.c69
1 files changed, 69 insertions, 0 deletions
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 7c4c4aa07d5..2720d3902ab 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1820,6 +1820,75 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
}
/*
+ * collation-aware, wide-character-aware case folding
+ *
+ * We pass the number of bytes so we can pass varlena and char*
+ * to this function. The result is a palloc'd, null-terminated string.
+ */
+char *
+str_casefold(const char *buff, size_t nbytes, Oid collid)
+{
+ char *result;
+ pg_locale_t mylocale;
+
+ if (!buff)
+ return NULL;
+
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a conflict
+ * of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for %s function",
+ "lower()"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+
+ if (GetDatabaseEncoding() != PG_UTF8)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("Unicode case folding can only be performed if server encoding is UTF8")));
+
+ mylocale = pg_newlocale_from_collation(collid);
+
+ /* C/POSIX collations use this path regardless of database encoding */
+ if (mylocale->ctype_is_c)
+ {
+ result = asc_tolower(buff, nbytes);
+ }
+ else
+ {
+ const char *src = buff;
+ size_t srclen = nbytes;
+ size_t dstsize;
+ char *dst;
+ size_t needed;
+
+ /* first try buffer of equal size plus terminating NUL */
+ dstsize = srclen + 1;
+ dst = palloc(dstsize);
+
+ needed = pg_strfold(dst, dstsize, src, srclen, mylocale);
+ if (needed + 1 > dstsize)
+ {
+ /* grow buffer if needed and retry */
+ dstsize = needed + 1;
+ dst = repalloc(dst, dstsize);
+ needed = pg_strfold(dst, dstsize, src, srclen, mylocale);
+ Assert(needed + 1 <= dstsize);
+ }
+
+ Assert(dst[needed] == '\0');
+ result = dst;
+ }
+
+ return result;
+}
+
+/*
* ASCII-only lower function
*
* We pass the number of bytes so we can pass varlena and char*