aboutsummaryrefslogtreecommitdiff
path: root/contrib
diff options
context:
space:
mode:
authorPeter Eisentraut <peter@eisentraut.org>2024-12-17 14:04:55 +0100
committerPeter Eisentraut <peter@eisentraut.org>2024-12-17 14:04:55 +0100
commitfb1a18810f07fc3b722392103d67ce8ed188b63d (patch)
treeea418d1a2e82d3104644f550c6a71366f3acfa6c /contrib
parentd3aad4ac57c5592ade77916404e6d8a989a1d6a1 (diff)
downloadpostgresql-fb1a18810f07fc3b722392103d67ce8ed188b63d.tar.gz
postgresql-fb1a18810f07fc3b722392103d67ce8ed188b63d.zip
Remove ts_locale.c's lowerstr()
lowerstr() and lowerstr_with_len() in ts_locale.c do the same thing as str_tolower() that the rest of the system uses, except that the former don't use the common locale provider framework but instead use the global libc locale settings. This patch replaces uses of lowerstr*() with str_tolower(..., DEFAULT_COLLATION_OID). For instances that use a libc locale globally, this will result in exactly the same behavior. For instances that use other locale providers, you now get consistent behavior and are no longer dependent on the libc locale settings (for this case; there are others). Most uses of these functions are for processing dictionary and configuration files. In those cases, using the default collation seems appropriate. At least we don't have a more specific collation available. But the code in contrib/pg_trgm should really depend on the collation of the columns being processed. This is not done here, this can be done in a separate patch. (You can probably construct some edge cases where this change would create some locale-related upgrade incompatibility, for example if before you used a combination of ICU and a differently-behaving libc locale. We can document this in the release notes, but I don't think there is anything more we can do about this.) Reviewed-by: Jeff Davis <pgsql@j-davis.com> Discussion: https://www.postgresql.org/message-id/flat/653f3b84-fc87-45a7-9a0c-bfb4fcab3e7d%40eisentraut.org
Diffstat (limited to 'contrib')
-rw-r--r--contrib/dict_xsyn/dict_xsyn.c6
-rw-r--r--contrib/pg_trgm/trgm_op.c6
-rw-r--r--contrib/pg_trgm/trgm_regexp.c16
3 files changed, 17 insertions, 11 deletions
diff --git a/contrib/dict_xsyn/dict_xsyn.c b/contrib/dict_xsyn/dict_xsyn.c
index f8c0a5bf5c5..2206300f7b5 100644
--- a/contrib/dict_xsyn/dict_xsyn.c
+++ b/contrib/dict_xsyn/dict_xsyn.c
@@ -14,9 +14,11 @@
#include <ctype.h>
+#include "catalog/pg_collation_d.h"
#include "commands/defrem.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
+#include "utils/formatting.h"
PG_MODULE_MAGIC;
@@ -93,7 +95,7 @@ read_dictionary(DictSyn *d, const char *filename)
if (*line == '\0')
continue;
- value = lowerstr(line);
+ value = str_tolower(line, strlen(line), DEFAULT_COLLATION_OID);
pfree(line);
pos = value;
@@ -210,7 +212,7 @@ dxsyn_lexize(PG_FUNCTION_ARGS)
{
char *temp = pnstrdup(in, length);
- word.key = lowerstr(temp);
+ word.key = str_tolower(temp, length, DEFAULT_COLLATION_OID);
pfree(temp);
word.value = NULL;
}
diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c
index c509d15ee40..d0833b3e4a1 100644
--- a/contrib/pg_trgm/trgm_op.c
+++ b/contrib/pg_trgm/trgm_op.c
@@ -5,12 +5,14 @@
#include <ctype.h>
+#include "catalog/pg_collation_d.h"
#include "catalog/pg_type.h"
#include "common/int.h"
#include "lib/qunique.h"
#include "miscadmin.h"
#include "trgm.h"
#include "tsearch/ts_locale.h"
+#include "utils/formatting.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
@@ -303,7 +305,7 @@ generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL)
{
#ifdef IGNORECASE
- bword = lowerstr_with_len(bword, eword - bword);
+ bword = str_tolower(bword, eword - bword, DEFAULT_COLLATION_OID);
bytelen = strlen(bword);
#else
bytelen = eword - bword;
@@ -899,7 +901,7 @@ generate_wildcard_trgm(const char *str, int slen)
buf, &bytelen, &charlen)) != NULL)
{
#ifdef IGNORECASE
- buf2 = lowerstr_with_len(buf, bytelen);
+ buf2 = str_tolower(buf, bytelen, DEFAULT_COLLATION_OID);
bytelen = strlen(buf2);
#else
buf2 = buf;
diff --git a/contrib/pg_trgm/trgm_regexp.c b/contrib/pg_trgm/trgm_regexp.c
index 75d6d1d4a8d..11488b2bd6e 100644
--- a/contrib/pg_trgm/trgm_regexp.c
+++ b/contrib/pg_trgm/trgm_regexp.c
@@ -191,9 +191,11 @@
*/
#include "postgres.h"
+#include "catalog/pg_collation_d.h"
#include "regex/regexport.h"
#include "trgm.h"
#include "tsearch/ts_locale.h"
+#include "utils/formatting.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
#include "varatt.h"
@@ -847,16 +849,16 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
* within each color, since we used the REG_ICASE option; so there's no
* need to process the uppercase version.
*
- * XXX this code is dependent on the assumption that lowerstr() works the
- * same as the regex engine's internal case folding machinery. Might be
- * wiser to expose pg_wc_tolower and test whether c == pg_wc_tolower(c).
- * On the other hand, the trigrams in the index were created using
- * lowerstr(), so we're probably screwed if there's any incompatibility
- * anyway.
+ * XXX this code is dependent on the assumption that str_tolower() works
+ * the same as the regex engine's internal case folding machinery. Might
+ * be wiser to expose pg_wc_tolower and test whether c ==
+ * pg_wc_tolower(c). On the other hand, the trigrams in the index were
+ * created using str_tolower(), so we're probably screwed if there's any
+ * incompatibility anyway.
*/
#ifdef IGNORECASE
{
- char *lowerCased = lowerstr(s);
+ char *lowerCased = str_tolower(s, strlen(s), DEFAULT_COLLATION_OID);
if (strcmp(lowerCased, s) != 0)
{