diff options
Diffstat (limited to 'src/backend/utils/adt/selfuncs.c')
-rw-r--r-- | src/backend/utils/adt/selfuncs.c | 57 |
1 files changed, 53 insertions, 4 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 63e4d9b46d2..8d4c4d080a4 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.82 2000/11/16 22:30:31 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.83 2000/11/25 20:33:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -24,6 +24,9 @@ #include <ctype.h> #include <math.h> +#ifdef USE_LOCALE +#include <locale.h> +#endif #include "access/heapam.h" #include "catalog/catname.h" @@ -1581,6 +1584,11 @@ pattern_fixed_prefix(char *patt, Pattern_Type ptype, * * A fixed prefix "foo" is estimated as the selectivity of the expression * "var >= 'foo' AND var < 'fop'" (see also indxqual.c). + * + * XXX Note: we make use of the upper bound to estimate operator selectivity + * even if the locale is such that we cannot rely on the upper-bound string. + * The selectivity only needs to be approximately right anyway, so it seems + * more useful to use the upper-bound code than not. */ static Selectivity prefix_selectivity(char *prefix, @@ -1862,6 +1870,44 @@ pattern_selectivity(char *patt, Pattern_Type ptype) return result; } +/* + * Test whether the database's LOCALE setting is safe for LIKE/regexp index + * optimization. The key requirement here is that given a prefix string, + * say "foo", we must be able to generate another string "fop" that is + * greater than all strings "foobar" starting with "foo". Unfortunately, + * many non-C locales have bizarre collation rules in which "fop" > "foo" + * is not sufficient to ensure "fop" > "foobar". Until we can come up + * with a more bulletproof way of generating the upper-bound string, + * disable the optimization in locales where it is not known to be safe. + */ +bool +locale_is_like_safe(void) +{ +#ifdef USE_LOCALE + /* Cache result so we only have to compute it once */ + static int result = -1; + char *localeptr; + + if (result >= 0) + return (bool) result; + localeptr = setlocale(LC_COLLATE, NULL); + if (!localeptr) + elog(STOP, "Invalid LC_COLLATE setting"); + /* + * Currently we accept only "C" and "POSIX" (do any systems still + * return "POSIX"?). Which other locales allow safe optimization? + */ + if (strcmp(localeptr, "C") == 0) + result = true; + else if (strcmp(localeptr, "POSIX") == 0) + result = true; + else + result = false; + return (bool) result; +#else /* not USE_LOCALE */ + return true; /* We must be in C locale, which is OK */ +#endif /* USE_LOCALE */ +} /* * Try to generate a string greater than the given string or any string it is @@ -1878,9 +1924,12 @@ pattern_selectivity(char *patt, Pattern_Type ptype) * This could be rather slow in the worst case, but in most cases we won't * have to try more than one or two strings before succeeding. * - * XXX in a sufficiently weird locale, this might produce incorrect results? - * For example, in German I believe "ss" is treated specially --- if we are - * given "foos" and return "foot", will this actually be greater than "fooss"? + * XXX this is actually not sufficient, since it only copes with the case + * where individual characters collate in an order different from their + * numeric code assignments. It does not handle cases where there are + * cross-character effects, such as specially sorted digraphs, multiple + * sort passes, etc. For now, we just shut down the whole thing in locales + * that do such things :-( */ char * make_greater_string(const char *str, Oid datatype) |