diff options
author | Jeff Davis <jdavis@postgresql.org> | 2025-01-24 14:56:22 -0800 |
---|---|---|
committer | Jeff Davis <jdavis@postgresql.org> | 2025-01-24 14:56:22 -0800 |
commit | bfc5992069cf00b189af83d96a83ae5ebb65e938 (patch) | |
tree | 94332f38e12deb4a6dcfdc011c42848069190ec5 /src/backend/utils/adt/formatting.c | |
parent | f15538cd27d4eeb7d665263a3d7b5700362d7eb0 (diff) | |
download | postgresql-bfc5992069cf00b189af83d96a83ae5ebb65e938.tar.gz postgresql-bfc5992069cf00b189af83d96a83ae5ebb65e938.zip |
Add SQL function CASEFOLD().
Useful for caseless matching. Similar to LOWER(), but avoids edge-case
problems with using LOWER() for caseless matching.
For collations that support it, CASEFOLD() handles characters with
more than two case variations or multi-character case variations. Some
characters may fold to uppercase. The results of case folding are also
more stable across Unicode versions than LOWER() or UPPER().
Discussion: https://postgr.es/m/a1886ddfcd8f60cb3e905c93009b646b4cfb74c5.camel%40j-davis.com
Reviewed-by: Ian Lawrence Barwick
Diffstat (limited to 'src/backend/utils/adt/formatting.c')
-rw-r--r-- | src/backend/utils/adt/formatting.c | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 7c4c4aa07d5..2720d3902ab 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1820,6 +1820,75 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) } /* + * collation-aware, wide-character-aware case folding + * + * We pass the number of bytes so we can pass varlena and char* + * to this function. The result is a palloc'd, null-terminated string. + */ +char * +str_casefold(const char *buff, size_t nbytes, Oid collid) +{ + char *result; + pg_locale_t mylocale; + + if (!buff) + return NULL; + + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a conflict + * of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for %s function", + "lower()"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + + if (GetDatabaseEncoding() != PG_UTF8) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("Unicode case folding can only be performed if server encoding is UTF8"))); + + mylocale = pg_newlocale_from_collation(collid); + + /* C/POSIX collations use this path regardless of database encoding */ + if (mylocale->ctype_is_c) + { + result = asc_tolower(buff, nbytes); + } + else + { + const char *src = buff; + size_t srclen = nbytes; + size_t dstsize; + char *dst; + size_t needed; + + /* first try buffer of equal size plus terminating NUL */ + dstsize = srclen + 1; + dst = palloc(dstsize); + + needed = pg_strfold(dst, dstsize, src, srclen, mylocale); + if (needed + 1 > dstsize) + { + /* grow buffer if needed and retry */ + dstsize = needed + 1; + dst = repalloc(dst, dstsize); + needed = pg_strfold(dst, dstsize, src, srclen, mylocale); + Assert(needed + 1 <= dstsize); + } + + Assert(dst[needed] == '\0'); + result = dst; + } + + return result; +} + +/* * ASCII-only lower function * * We pass the number of bytes so we can pass varlena and char* |