diff options
author | Jeff Davis <jdavis@postgresql.org> | 2024-08-06 11:55:21 -0700 |
---|---|---|
committer | Jeff Davis <jdavis@postgresql.org> | 2024-08-06 12:25:12 -0700 |
commit | a890ad214942c9eab1b2f0c6997e7dc114f99e71 (patch) | |
tree | 7d3d4a461249eeb528a5739371c9cb6550daeb28 /src/backend/utils/adt/selfuncs.c | |
parent | a54d4ed183927f15e1853b83106acebeeeee11c8 (diff) | |
download | postgresql-a890ad214942c9eab1b2f0c6997e7dc114f99e71.tar.gz postgresql-a890ad214942c9eab1b2f0c6997e7dc114f99e71.zip |
selfuncs.c: use pg_strxfrm() instead of strxfrm().
pg_strxfrm() takes a pg_locale_t, so it works properly with all
providers. This improves estimates for ICU when performing linear
interpolation within a histogram bin.
Previously, convert_string_datum() always used strxfrm() and relied on
setlocale(). That did not produce good estimates for non-default or
non-libc collations.
Discussion: https://postgr.es/m/89475ee5487d795124f4e25118ea8f1853edb8cb.camel@j-davis.com
Diffstat (limited to 'src/backend/utils/adt/selfuncs.c')
-rw-r--r-- | src/backend/utils/adt/selfuncs.c | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 877a62a62ec..bf42393bec6 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -4639,7 +4639,7 @@ convert_one_string_to_scalar(char *value, int rangelo, int rangehi) * On failure (e.g., unsupported typid), set *failure to true; * otherwise, that variable is not changed. (We'll return NULL on failure.) * - * When using a non-C locale, we must pass the string through strxfrm() + * When using a non-C locale, we must pass the string through pg_strxfrm() * before continuing, so as to generate correct locale-specific results. */ static char * @@ -4673,20 +4673,25 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure) if (!lc_collate_is_c(collid)) { + pg_locale_t mylocale = pg_newlocale_from_collation(collid); char *xfrmstr; size_t xfrmlen; size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY; /* * XXX: We could guess at a suitable output buffer size and only call - * strxfrm twice if our guess is too small. + * pg_strxfrm() twice if our guess is too small. * * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return * bogus data or set an error. This is not really a problem unless it * crashes since it will only give an estimation error and nothing * fatal. + * + * XXX: we do not check pg_strxfrm_enabled(). On some platforms and in + * some cases, libc strxfrm() may return the wrong results, but that + * will only lead to an estimation error. */ - xfrmlen = strxfrm(NULL, val, 0); + xfrmlen = pg_strxfrm(NULL, val, 0, mylocale); #ifdef WIN32 /* @@ -4698,7 +4703,7 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure) return val; #endif xfrmstr = (char *) palloc(xfrmlen + 1); - xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1); + xfrmlen2 = pg_strxfrm(xfrmstr, val, xfrmlen + 1, mylocale); /* * Some systems (e.g., glibc) can return a smaller value from the |