aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/selfuncs.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2000-11-25 20:33:54 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2000-11-25 20:33:54 +0000
commitbbea3643a3a6425f92d0db9ff16c7f73a31a466c (patch)
treef13bea7c027c5e4e5155eb802a16e0e2c1d3a0c8 /src/backend/utils/adt/selfuncs.c
parent0432ce994d3971ced6e336e9f58444d5322c3270 (diff)
downloadpostgresql-bbea3643a3a6425f92d0db9ff16c7f73a31a466c.tar.gz
postgresql-bbea3643a3a6425f92d0db9ff16c7f73a31a466c.zip
Store current LC_COLLATE and LC_CTYPE settings in pg_control during initdb;
re-adopt these settings at every postmaster or standalone-backend startup. This should fix problems with indexes becoming corrupt due to failure to provide consistent locale environment for postmaster at all times. Also, refuse to start up a non-locale-enabled compilation in a database originally initdb'd with a non-C locale. Suppress LIKE index optimization if locale is not "C" or "POSIX" (are there any other locales where it's safe?). Issue NOTICE during initdb if selected locale disables LIKE optimization.
Diffstat (limited to 'src/backend/utils/adt/selfuncs.c')
-rw-r--r--src/backend/utils/adt/selfuncs.c57
1 files changed, 53 insertions, 4 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 63e4d9b46d2..8d4c4d080a4 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.82 2000/11/16 22:30:31 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.83 2000/11/25 20:33:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -24,6 +24,9 @@
#include <ctype.h>
#include <math.h>
+#ifdef USE_LOCALE
+#include <locale.h>
+#endif
#include "access/heapam.h"
#include "catalog/catname.h"
@@ -1581,6 +1584,11 @@ pattern_fixed_prefix(char *patt, Pattern_Type ptype,
*
* A fixed prefix "foo" is estimated as the selectivity of the expression
* "var >= 'foo' AND var < 'fop'" (see also indxqual.c).
+ *
+ * XXX Note: we make use of the upper bound to estimate operator selectivity
+ * even if the locale is such that we cannot rely on the upper-bound string.
+ * The selectivity only needs to be approximately right anyway, so it seems
+ * more useful to use the upper-bound code than not.
*/
static Selectivity
prefix_selectivity(char *prefix,
@@ -1862,6 +1870,44 @@ pattern_selectivity(char *patt, Pattern_Type ptype)
return result;
}
+/*
+ * Test whether the database's LOCALE setting is safe for LIKE/regexp index
+ * optimization. The key requirement here is that given a prefix string,
+ * say "foo", we must be able to generate another string "fop" that is
+ * greater than all strings "foobar" starting with "foo". Unfortunately,
+ * many non-C locales have bizarre collation rules in which "fop" > "foo"
+ * is not sufficient to ensure "fop" > "foobar". Until we can come up
+ * with a more bulletproof way of generating the upper-bound string,
+ * disable the optimization in locales where it is not known to be safe.
+ */
+bool
+locale_is_like_safe(void)
+{
+#ifdef USE_LOCALE
+ /* Cache result so we only have to compute it once */
+ static int result = -1;
+ char *localeptr;
+
+ if (result >= 0)
+ return (bool) result;
+ localeptr = setlocale(LC_COLLATE, NULL);
+ if (!localeptr)
+ elog(STOP, "Invalid LC_COLLATE setting");
+ /*
+ * Currently we accept only "C" and "POSIX" (do any systems still
+ * return "POSIX"?). Which other locales allow safe optimization?
+ */
+ if (strcmp(localeptr, "C") == 0)
+ result = true;
+ else if (strcmp(localeptr, "POSIX") == 0)
+ result = true;
+ else
+ result = false;
+ return (bool) result;
+#else /* not USE_LOCALE */
+ return true; /* We must be in C locale, which is OK */
+#endif /* USE_LOCALE */
+}
/*
* Try to generate a string greater than the given string or any string it is
@@ -1878,9 +1924,12 @@ pattern_selectivity(char *patt, Pattern_Type ptype)
* This could be rather slow in the worst case, but in most cases we won't
* have to try more than one or two strings before succeeding.
*
- * XXX in a sufficiently weird locale, this might produce incorrect results?
- * For example, in German I believe "ss" is treated specially --- if we are
- * given "foos" and return "foot", will this actually be greater than "fooss"?
+ * XXX this is actually not sufficient, since it only copes with the case
+ * where individual characters collate in an order different from their
+ * numeric code assignments. It does not handle cases where there are
+ * cross-character effects, such as specially sorted digraphs, multiple
+ * sort passes, etc. For now, we just shut down the whole thing in locales
+ * that do such things :-(
*/
char *
make_greater_string(const char *str, Oid datatype)