aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2011-03-20 12:43:39 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2011-03-20 12:44:13 -0400
commit176d5bae1d636fc1e91840b12cbd04c96d638b7e (patch)
treef861d3f9d9eb2bead0cd932e7825271fb1fbc1e1 /src
parentc2f4ea469b52e6f7fedff651a4aa0acced873a5f (diff)
downloadpostgresql-176d5bae1d636fc1e91840b12cbd04c96d638b7e.tar.gz
postgresql-176d5bae1d636fc1e91840b12cbd04c96d638b7e.zip
Fix up handling of C/POSIX collations.
Install just one instance of the "C" and "POSIX" collations into pg_collation, rather than one per encoding. Make these instances exist and do something useful even in machines without locale_t support: to wit, it's now possible to force comparisons and case-folding functions to use C locale in an otherwise non-C database, whether or not the platform has support for using any additional collations. Fix up severely broken upper/lower/initcap functions, too: the C/POSIX fastpath now does what it is supposed to, and non-default collations are handled correctly in single-byte database encodings. Merge the two separate collation hashtables that were being maintained in pg_locale.c, and be more wary of the possibility that we fail partway through filling a cache entry.
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/formatting.c163
-rw-r--r--src/backend/utils/adt/pg_locale.c488
-rw-r--r--src/bin/initdb/initdb.c18
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_collation.h15
-rw-r--r--src/include/port.h2
-rw-r--r--src/port/pgstrcasecmp.c26
7 files changed, 461 insertions, 253 deletions
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index aba11459bb1..54783103a2c 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1462,10 +1462,16 @@ str_numth(char *dest, char *num, int type)
* in multibyte character sets. Note that in either case we are effectively
* assuming that the database character encoding matches the encoding implied
* by LC_CTYPE.
+ *
+ * If the system provides locale_t and associated functions (which are
+ * standardized by Open Group's XBD), we can support collations that are
+ * neither default nor C. The code is written to handle both combinations
+ * of have-wide-characters and have-locale_t, though it's rather unlikely
+ * a platform would have the latter without the former.
*/
/*
- * wide-character-aware lower function
+ * collation-aware, wide-character-aware lower function
*
* We pass the number of bytes so we can pass varlena and char*
* to this function. The result is a palloc'd, null-terminated string.
@@ -1474,21 +1480,31 @@ char *
str_tolower(const char *buff, size_t nbytes, Oid collid)
{
char *result;
- pg_locale_t mylocale = 0;
if (!buff)
return NULL;
- if (collid != DEFAULT_COLLATION_OID)
- mylocale = pg_newlocale_from_collation(collid);
+ /* C/POSIX collations use this path regardless of database encoding */
+ if (lc_ctype_is_c(collid))
+ {
+ char *p;
+
+ result = pnstrdup(buff, nbytes);
+ for (p = result; *p; p++)
+ *p = pg_ascii_tolower((unsigned char) *p);
+ }
#ifdef USE_WIDE_UPPER_LOWER
- if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collid))
+ else if (pg_database_encoding_max_length() > 1)
{
+ pg_locale_t mylocale = 0;
wchar_t *workspace;
size_t curr_char;
size_t result_size;
+ if (collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
/* Overflow paranoia */
if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
ereport(ERROR,
@@ -1501,12 +1517,14 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
char2wchar(workspace, nbytes + 1, buff, nbytes, collid);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ {
#ifdef HAVE_LOCALE_T
if (mylocale)
workspace[curr_char] = towlower_l(workspace[curr_char], mylocale);
else
#endif
- workspace[curr_char] = towlower(workspace[curr_char]);
+ workspace[curr_char] = towlower(workspace[curr_char]);
+ }
/* Make result large enough; case change might change number of bytes */
result_size = curr_char * pg_database_encoding_max_length() + 1;
@@ -1515,22 +1533,40 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
wchar2char(result, workspace, result_size, collid);
pfree(workspace);
}
- else
#endif /* USE_WIDE_UPPER_LOWER */
+ else
{
+ pg_locale_t mylocale = 0;
char *p;
+ if (collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
result = pnstrdup(buff, nbytes);
+ /*
+ * Note: we assume that tolower_l() will not be so broken as to need
+ * an isupper_l() guard test. When using the default collation, we
+ * apply the traditional Postgres behavior that forces ASCII-style
+ * treatment of I/i, but in non-default collations you get exactly
+ * what the collation says.
+ */
for (p = result; *p; p++)
- *p = pg_tolower((unsigned char) *p);
+ {
+#ifdef HAVE_LOCALE_T
+ if (mylocale)
+ *p = tolower_l((unsigned char) *p, mylocale);
+ else
+#endif
+ *p = pg_tolower((unsigned char) *p);
+ }
}
return result;
}
/*
- * wide-character-aware upper function
+ * collation-aware, wide-character-aware upper function
*
* We pass the number of bytes so we can pass varlena and char*
* to this function. The result is a palloc'd, null-terminated string.
@@ -1539,21 +1575,31 @@ char *
str_toupper(const char *buff, size_t nbytes, Oid collid)
{
char *result;
- pg_locale_t mylocale = 0;
if (!buff)
return NULL;
- if (collid != DEFAULT_COLLATION_OID)
- mylocale = pg_newlocale_from_collation(collid);
+ /* C/POSIX collations use this path regardless of database encoding */
+ if (lc_ctype_is_c(collid))
+ {
+ char *p;
+ result = pnstrdup(buff, nbytes);
+
+ for (p = result; *p; p++)
+ *p = pg_ascii_toupper((unsigned char) *p);
+ }
#ifdef USE_WIDE_UPPER_LOWER
- if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collid))
+ else if (pg_database_encoding_max_length() > 1)
{
+ pg_locale_t mylocale = 0;
wchar_t *workspace;
size_t curr_char;
size_t result_size;
+ if (collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
/* Overflow paranoia */
if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
ereport(ERROR,
@@ -1566,12 +1612,14 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
char2wchar(workspace, nbytes + 1, buff, nbytes, collid);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ {
#ifdef HAVE_LOCALE_T
if (mylocale)
workspace[curr_char] = towupper_l(workspace[curr_char], mylocale);
else
#endif
- workspace[curr_char] = towupper(workspace[curr_char]);
+ workspace[curr_char] = towupper(workspace[curr_char]);
+ }
/* Make result large enough; case change might change number of bytes */
result_size = curr_char * pg_database_encoding_max_length() + 1;
@@ -1580,22 +1628,40 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
wchar2char(result, workspace, result_size, collid);
pfree(workspace);
}
- else
#endif /* USE_WIDE_UPPER_LOWER */
+ else
{
+ pg_locale_t mylocale = 0;
char *p;
+ if (collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
result = pnstrdup(buff, nbytes);
+ /*
+ * Note: we assume that toupper_l() will not be so broken as to need
+ * an islower_l() guard test. When using the default collation, we
+ * apply the traditional Postgres behavior that forces ASCII-style
+ * treatment of I/i, but in non-default collations you get exactly
+ * what the collation says.
+ */
for (p = result; *p; p++)
- *p = pg_toupper((unsigned char) *p);
+ {
+#ifdef HAVE_LOCALE_T
+ if (mylocale)
+ *p = toupper_l((unsigned char) *p, mylocale);
+ else
+#endif
+ *p = pg_toupper((unsigned char) *p);
+ }
}
return result;
}
/*
- * wide-character-aware initcap function
+ * collation-aware, wide-character-aware initcap function
*
* We pass the number of bytes so we can pass varlena and char*
* to this function. The result is a palloc'd, null-terminated string.
@@ -1605,21 +1671,42 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
{
char *result;
int wasalnum = false;
- pg_locale_t mylocale = 0;
if (!buff)
return NULL;
- if (collid != DEFAULT_COLLATION_OID)
- mylocale = pg_newlocale_from_collation(collid);
+ /* C/POSIX collations use this path regardless of database encoding */
+ if (lc_ctype_is_c(collid))
+ {
+ char *p;
+
+ result = pnstrdup(buff, nbytes);
+ for (p = result; *p; p++)
+ {
+ char c;
+
+ if (wasalnum)
+ *p = c = pg_ascii_tolower((unsigned char) *p);
+ else
+ *p = c = pg_ascii_toupper((unsigned char) *p);
+ /* we don't trust isalnum() here */
+ wasalnum = ((c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z') ||
+ (c >= '0' && c <= '9'));
+ }
+ }
#ifdef USE_WIDE_UPPER_LOWER
- if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collid))
+ else if (pg_database_encoding_max_length() > 1)
{
+ pg_locale_t mylocale = 0;
wchar_t *workspace;
size_t curr_char;
size_t result_size;
+ if (collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
/* Overflow paranoia */
if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
ereport(ERROR,
@@ -1660,20 +1747,44 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
wchar2char(result, workspace, result_size, collid);
pfree(workspace);
}
- else
#endif /* USE_WIDE_UPPER_LOWER */
+ else
{
+ pg_locale_t mylocale = 0;
char *p;
+ if (collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
result = pnstrdup(buff, nbytes);
+ /*
+ * Note: we assume that toupper_l()/tolower_l() will not be so broken
+ * as to need guard tests. When using the default collation, we apply
+ * the traditional Postgres behavior that forces ASCII-style treatment
+ * of I/i, but in non-default collations you get exactly what the
+ * collation says.
+ */
for (p = result; *p; p++)
{
- if (wasalnum)
- *p = pg_tolower((unsigned char) *p);
+#ifdef HAVE_LOCALE_T
+ if (mylocale)
+ {
+ if (wasalnum)
+ *p = tolower_l((unsigned char) *p, mylocale);
+ else
+ *p = toupper_l((unsigned char) *p, mylocale);
+ wasalnum = isalnum_l((unsigned char) *p, mylocale);
+ }
else
- *p = pg_toupper((unsigned char) *p);
- wasalnum = isalnum((unsigned char) *p);
+#endif
+ {
+ if (wasalnum)
+ *p = pg_tolower((unsigned char) *p);
+ else
+ *p = pg_toupper((unsigned char) *p);
+ wasalnum = isalnum((unsigned char) *p);
+ }
}
}
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 2b9b321b263..15d347c4f89 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -99,15 +99,24 @@ static char lc_monetary_envbuf[LC_ENV_BUFSIZE];
static char lc_numeric_envbuf[LC_ENV_BUFSIZE];
static char lc_time_envbuf[LC_ENV_BUFSIZE];
+/* Cache for collation-related knowledge */
+
+typedef struct
+{
+ Oid collid; /* hash key: pg_collation OID */
+ bool collate_is_c; /* is collation's LC_COLLATE C? */
+ bool ctype_is_c; /* is collation's LC_CTYPE C? */
+ bool flags_valid; /* true if above flags are valid */
+ pg_locale_t locale; /* locale_t struct, or 0 if not valid */
+} collation_cache_entry;
+
+static HTAB *collation_cache = NULL;
+
+
#if defined(WIN32) && defined(LC_MESSAGES)
static char *IsoLocaleName(const char *); /* MSVC specific */
#endif
-static HTAB *locale_cness_cache = NULL;
-#ifdef HAVE_LOCALE_T
-static HTAB *locale_t_cache = NULL;
-#endif
-
/*
* pg_perm_setlocale
@@ -313,136 +322,6 @@ locale_messages_assign(const char *value, bool doit, GucSource source)
/*
- * We'd like to cache whether LC_COLLATE or LC_CTYPE is C (or POSIX),
- * so we can optimize a few code paths in various places.
- *
- * Note that some code relies on this not reporting false negatives
- * (that is, saying it's not C when it is). For example, char2wchar()
- * could fail if the locale is C, so str_tolower() shouldn't call it
- * in that case.
- */
-
-struct locale_cness_cache_entry
-{
- Oid collid;
- bool collate_is_c;
- bool ctype_is_c;
-};
-
-static void
-init_locale_cness_cache(void)
-{
- HASHCTL ctl;
-
- memset(&ctl, 0, sizeof(ctl));
- ctl.keysize = sizeof(Oid);
- ctl.entrysize = sizeof(struct locale_cness_cache_entry);
- ctl.hash = oid_hash;
- locale_cness_cache = hash_create("locale C-ness cache", 1000, &ctl, HASH_ELEM | HASH_FUNCTION);
-}
-
-/*
- * Handle caching of locale "C-ness" for nondefault collation objects.
- * Relying on the system cache directly isn't fast enough.
- */
-static bool
-lookup_collation_cness(Oid collation, int category)
-{
- struct locale_cness_cache_entry *cache_entry;
- bool found;
- HeapTuple tp;
- char *localeptr;
-
- Assert(OidIsValid(collation));
- Assert(category == LC_COLLATE || category == LC_CTYPE);
-
- if (!locale_cness_cache)
- init_locale_cness_cache();
-
- cache_entry = hash_search(locale_cness_cache, &collation, HASH_ENTER, &found);
- if (found)
- {
- if (category == LC_COLLATE)
- return cache_entry->collate_is_c;
- else
- return cache_entry->ctype_is_c;
- }
-
- tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
- if (!HeapTupleIsValid(tp))
- elog(ERROR, "cache lookup failed for collation %u", collation);
-
- localeptr = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate);
- cache_entry->collate_is_c = (strcmp(localeptr, "C") == 0) || (strcmp(localeptr, "POSIX") == 0);
-
- localeptr = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype);
- cache_entry->ctype_is_c = (strcmp(localeptr, "C") == 0) || (strcmp(localeptr, "POSIX") == 0);
-
- ReleaseSysCache(tp);
-
- return category == LC_COLLATE ? cache_entry->collate_is_c : cache_entry->ctype_is_c;
-}
-
-
-bool
-lc_collate_is_c(Oid collation)
-{
- /* Cache result so we only have to compute it once */
- static int result = -1;
- char *localeptr;
-
- if (!OidIsValid(collation))
- return false;
-
- if (collation != DEFAULT_COLLATION_OID)
- return lookup_collation_cness(collation, LC_COLLATE);
-
- if (result >= 0)
- return (bool) result;
- localeptr = setlocale(LC_COLLATE, NULL);
- if (!localeptr)
- elog(ERROR, "invalid LC_COLLATE setting");
-
- if (strcmp(localeptr, "C") == 0)
- result = true;
- else if (strcmp(localeptr, "POSIX") == 0)
- result = true;
- else
- result = false;
- return (bool) result;
-}
-
-
-bool
-lc_ctype_is_c(Oid collation)
-{
- /* Cache result so we only have to compute it once */
- static int result = -1;
- char *localeptr;
-
- if (!OidIsValid(collation))
- return false;
-
- if (collation != DEFAULT_COLLATION_OID)
- return lookup_collation_cness(collation, LC_CTYPE);
-
- if (result >= 0)
- return (bool) result;
- localeptr = setlocale(LC_CTYPE, NULL);
- if (!localeptr)
- elog(ERROR, "invalid LC_CTYPE setting");
-
- if (strcmp(localeptr, "C") == 0)
- result = true;
- else if (strcmp(localeptr, "POSIX") == 0)
- result = true;
- else
- result = false;
- return (bool) result;
-}
-
-
-/*
* Frees the malloced content of a struct lconv. (But not the struct
* itself.)
*/
@@ -844,116 +723,295 @@ IsoLocaleName(const char *winlocname)
#endif /* WIN32 && LC_MESSAGES */
-#ifdef HAVE_LOCALE_T
-struct locale_t_cache_entry
+/*
+ * Cache mechanism for collation information.
+ *
+ * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
+ * (or POSIX), so we can optimize a few code paths in various places.
+ * For the built-in C and POSIX collations, we can know that without even
+ * doing a cache lookup, but we want to support aliases for C/POSIX too.
+ * For the "default" collation, there are separate static cache variables,
+ * since consulting the pg_collation catalog doesn't tell us what we need.
+ *
+ * Also, if a pg_locale_t has been requested for a collation, we cache that
+ * for the life of a backend.
+ *
+ * Note that some code relies on the flags not reporting false negatives
+ * (that is, saying it's not C when it is). For example, char2wchar()
+ * could fail if the locale is C, so str_tolower() shouldn't call it
+ * in that case.
+ *
+ * Note that we currently lack any way to flush the cache. Since we don't
+ * support ALTER COLLATION, this is OK. The worst case is that someone
+ * drops a collation, and a useless cache entry hangs around in existing
+ * backends.
+ */
+
+static collation_cache_entry *
+lookup_collation_cache(Oid collation, bool set_flags)
{
- Oid collid;
- locale_t locale;
-};
+ collation_cache_entry *cache_entry;
+ bool found;
-static void
-init_locale_t_cache(void)
+ Assert(OidIsValid(collation));
+ Assert(collation != DEFAULT_COLLATION_OID);
+
+ if (collation_cache == NULL)
+ {
+ /* First time through, initialize the hash table */
+ HASHCTL ctl;
+
+ memset(&ctl, 0, sizeof(ctl));
+ ctl.keysize = sizeof(Oid);
+ ctl.entrysize = sizeof(collation_cache_entry);
+ ctl.hash = oid_hash;
+ collation_cache = hash_create("Collation cache", 100, &ctl,
+ HASH_ELEM | HASH_FUNCTION);
+ }
+
+ cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
+ if (!found)
+ {
+ /*
+ * Make sure cache entry is marked invalid, in case we fail before
+ * setting things.
+ */
+ cache_entry->flags_valid = false;
+ cache_entry->locale = 0;
+ }
+
+ if (set_flags && !cache_entry->flags_valid)
+ {
+ /* Attempt to set the flags */
+ HeapTuple tp;
+ Form_pg_collation collform;
+ const char *collcollate;
+ const char *collctype;
+
+ tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
+ if (!HeapTupleIsValid(tp))
+ elog(ERROR, "cache lookup failed for collation %u", collation);
+ collform = (Form_pg_collation) GETSTRUCT(tp);
+
+ collcollate = NameStr(collform->collcollate);
+ collctype = NameStr(collform->collctype);
+
+ cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
+ (strcmp(collcollate, "POSIX") == 0));
+ cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
+ (strcmp(collctype, "POSIX") == 0));
+
+ cache_entry->flags_valid = true;
+
+ ReleaseSysCache(tp);
+ }
+
+ return cache_entry;
+}
+
+
+/*
+ * Detect whether collation's LC_COLLATE property is C
+ */
+bool
+lc_collate_is_c(Oid collation)
{
- HASHCTL ctl;
+ /*
+ * If we're asked about "collation 0", return false, so that the code
+ * will go into the non-C path and report that the collation is bogus.
+ */
+ if (!OidIsValid(collation))
+ return false;
+
+ /*
+ * If we're asked about the default collation, we have to inquire of
+ * the C library. Cache the result so we only have to compute it once.
+ */
+ if (collation == DEFAULT_COLLATION_OID)
+ {
+ static int result = -1;
+ char *localeptr;
+
+ if (result >= 0)
+ return (bool) result;
+ localeptr = setlocale(LC_COLLATE, NULL);
+ if (!localeptr)
+ elog(ERROR, "invalid LC_COLLATE setting");
+
+ if (strcmp(localeptr, "C") == 0)
+ result = true;
+ else if (strcmp(localeptr, "POSIX") == 0)
+ result = true;
+ else
+ result = false;
+ return (bool) result;
+ }
+
+ /*
+ * If we're asked about the built-in C/POSIX collations, we know that.
+ */
+ if (collation == C_COLLATION_OID ||
+ collation == POSIX_COLLATION_OID)
+ return true;
+
+ /*
+ * Otherwise, we have to consult pg_collation, but we cache that.
+ */
+ return (lookup_collation_cache(collation, true))->collate_is_c;
+}
+
+/*
+ * Detect whether collation's LC_CTYPE property is C
+ */
+bool
+lc_ctype_is_c(Oid collation)
+{
+ /*
+ * If we're asked about "collation 0", return false, so that the code
+ * will go into the non-C path and report that the collation is bogus.
+ */
+ if (!OidIsValid(collation))
+ return false;
+
+ /*
+ * If we're asked about the default collation, we have to inquire of
+ * the C library. Cache the result so we only have to compute it once.
+ */
+ if (collation == DEFAULT_COLLATION_OID)
+ {
+ static int result = -1;
+ char *localeptr;
+
+ if (result >= 0)
+ return (bool) result;
+ localeptr = setlocale(LC_CTYPE, NULL);
+ if (!localeptr)
+ elog(ERROR, "invalid LC_CTYPE setting");
+
+ if (strcmp(localeptr, "C") == 0)
+ result = true;
+ else if (strcmp(localeptr, "POSIX") == 0)
+ result = true;
+ else
+ result = false;
+ return (bool) result;
+ }
+
+ /*
+ * If we're asked about the built-in C/POSIX collations, we know that.
+ */
+ if (collation == C_COLLATION_OID ||
+ collation == POSIX_COLLATION_OID)
+ return true;
- memset(&ctl, 0, sizeof(ctl));
- ctl.keysize = sizeof(Oid);
- ctl.entrysize = sizeof(struct locale_t_cache_entry);
- ctl.hash = oid_hash;
- locale_t_cache = hash_create("locale_t cache", 1000, &ctl, HASH_ELEM | HASH_FUNCTION);
+ /*
+ * Otherwise, we have to consult pg_collation, but we cache that.
+ */
+ return (lookup_collation_cache(collation, true))->ctype_is_c;
}
-#endif /* HAVE_LOCALE_T */
+
/*
* Create a locale_t from a collation OID. Results are cached for the
- * lifetime of the backend. Thus, do not free the result with
- * freelocale().
+ * lifetime of the backend. Thus, do not free the result with freelocale().
*
- * As a special optimization, the default/database collation returns
- * 0. Callers should then revert to the non-locale_t-enabled code
- * path. In fact, they shouldn't call this function at all when they
- * are dealing with the default locale. That can save quite a bit in
- * hotspots.
+ * As a special optimization, the default/database collation returns 0.
+ * Callers should then revert to the non-locale_t-enabled code path.
+ * In fact, they shouldn't call this function at all when they are dealing
+ * with the default locale. That can save quite a bit in hotspots.
+ * Also, callers should avoid calling this before going down a C/POSIX
+ * fastpath, because such a fastpath should work even on platforms without
+ * locale_t support in the C library.
*
* For simplicity, we always generate COLLATE + CTYPE even though we
- * might only need one of them. Since this is called only once per
- * session, it shouldn't cost much.
+ * might only need one of them. Since this is called only once per session,
+ * it shouldn't cost much.
*/
pg_locale_t
pg_newlocale_from_collation(Oid collid)
{
-#ifdef HAVE_LOCALE_T
- HeapTuple tp;
- const char *collcollate;
- const char *collctype;
- locale_t result;
- struct locale_t_cache_entry *cache_entry;
- bool found;
+ collation_cache_entry *cache_entry;
+ /* Return 0 for "default" collation, just in case caller forgets */
if (collid == DEFAULT_COLLATION_OID)
- return (locale_t) 0;
+ return (pg_locale_t) 0;
+ /*
+ * This is where we'll fail if a collation-aware function is invoked
+ * and no collation OID is passed. This typically means that the
+ * parser could not resolve a conflict of implicit collations, so
+ * report it that way.
+ */
if (!OidIsValid(collid))
- elog(ERROR, "locale operation to be invoked, but no collation was derived");
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("locale operation to be invoked, but no collation was derived")));
- if (!locale_t_cache)
- init_locale_t_cache();
+ cache_entry = lookup_collation_cache(collid, false);
- cache_entry = hash_search(locale_t_cache, &collid, HASH_ENTER, &found);
- if (found)
- return cache_entry->locale;
+ if (cache_entry->locale == 0)
+ {
+ /* We haven't computed this yet in this session, so do it */
+#ifdef HAVE_LOCALE_T
+ HeapTuple tp;
+ Form_pg_collation collform;
+ const char *collcollate;
+ const char *collctype;
+ locale_t result;
- tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
- if (!HeapTupleIsValid(tp))
- elog(ERROR, "cache lookup failed for collation %u", collid);
+ tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
+ if (!HeapTupleIsValid(tp))
+ elog(ERROR, "cache lookup failed for collation %u", collid);
+ collform = (Form_pg_collation) GETSTRUCT(tp);
- collcollate = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate);
- collctype = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype);
+ collcollate = NameStr(collform->collcollate);
+ collctype = NameStr(collform->collctype);
- if (strcmp(collcollate, collctype) == 0)
- {
- result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, NULL);
- if (!result)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not create locale \"%s\": %m", collcollate)));
- }
- else
- {
- locale_t loc1;
-
- loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
- if (!loc1)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not create locale \"%s\": %m", collcollate)));
- result = newlocale(LC_CTYPE_MASK, collctype, loc1);
- if (!result)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not create locale \"%s\": %m", collctype)));
- }
+ if (strcmp(collcollate, collctype) == 0)
+ {
+ /* Normal case where they're the same */
+ result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
+ NULL);
+ if (!result)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create locale \"%s\": %m",
+ collcollate)));
+ }
+ else
+ {
+ /* We need two newlocale() steps */
+ locale_t loc1;
+
+ loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
+ if (!loc1)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create locale \"%s\": %m",
+ collcollate)));
+ result = newlocale(LC_CTYPE_MASK, collctype, loc1);
+ if (!result)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create locale \"%s\": %m",
+ collctype)));
+ }
- ReleaseSysCache(tp);
+ cache_entry->locale = result;
- cache_entry->locale = result;
+ ReleaseSysCache(tp);
- return result;
#else /* not HAVE_LOCALE_T */
- /*
- * For platforms that don't support locale_t, check that we are
- * dealing with the default locale. It's unlikely that we'll get
- * here, but it's possible if users are creating collations even
- * though they are not supported, or they are mixing builds in odd
- * ways.
- */
- if (!OidIsValid(collid))
- elog(ERROR, "locale operation to be invoked, but no collation was derived");
- else if (collid != DEFAULT_COLLATION_OID)
+
+ /*
+ * For platforms that don't support locale_t, we can't do anything
+ * with non-default collations.
+ */
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("nondefault collations are not supported on this platform")));
-
- return 0;
#endif /* not HAVE_LOCALE_T */
+ }
+
+ return cache_entry->locale;
}
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index d509b1311d1..98e864d5495 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -1616,17 +1616,21 @@ setup_collation(void)
*/
skip = false;
for (i = 0; i < len; i++)
+ {
if (IS_HIGHBIT_SET(localebuf[i]))
{
- if (debug)
- fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: %s\n"),
- progname, localebuf);
- skipped++;
skip = true;
break;
}
+ }
if (skip)
+ {
+ if (debug)
+ fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: %s\n"),
+ progname, localebuf);
+ skipped++;
continue;
+ }
enc = pg_get_encoding_from_locale(localebuf, debug);
if (enc < 0)
@@ -1635,7 +1639,7 @@ setup_collation(void)
continue; /* error message printed by pg_get_encoding_from_locale() */
}
if (enc == PG_SQL_ASCII)
- continue; /* SQL_ASCII is handled separately */
+ continue; /* C/POSIX are already in the catalog */
PG_CMD_PRINTF2("INSERT INTO tmp_pg_collation (locale, encoding) VALUES ('%s', %d);",
escape_quotes(localebuf), enc);
@@ -1651,10 +1655,6 @@ setup_collation(void)
escape_quotes(alias), escape_quotes(localebuf), enc);
}
- for (i = PG_SQL_ASCII; i <= PG_ENCODING_BE_LAST; i++)
- PG_CMD_PRINTF2("INSERT INTO tmp_pg_collation (locale, encoding) VALUES ('C', %d), ('POSIX', %d);",
- i, i);
-
/* Add an SQL-standard name */
PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation (collname, locale, encoding) VALUES ('ucs_basic', 'C', %d);", PG_UTF8);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index c10de537bec..e9659092198 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201103191
+#define CATALOG_VERSION_NO 201103201
#endif
diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h
index e90aa050f54..2ab0c504f65 100644
--- a/src/include/catalog/pg_collation.h
+++ b/src/include/catalog/pg_collation.h
@@ -58,8 +58,19 @@ typedef FormData_pg_collation *Form_pg_collation;
#define Anum_pg_collation_collcollate 5
#define Anum_pg_collation_collctype 6
-DATA(insert OID = 100 ( default PGNSP PGUID -1 "" "" ));
+/* ----------------
+ * initial contents of pg_collation
+ * ----------------
+ */
+
+DATA(insert OID = 100 ( default PGNSP PGUID -1 "" "" ));
DESCR("database's default collation");
-#define DEFAULT_COLLATION_OID 100
+#define DEFAULT_COLLATION_OID 100
+DATA(insert OID = 950 ( C PGNSP PGUID -1 "C" "C" ));
+DESCR("standard C collation");
+#define C_COLLATION_OID 950
+DATA(insert OID = 951 ( POSIX PGNSP PGUID -1 "POSIX" "POSIX" ));
+DESCR("standard POSIX collation");
+#define POSIX_COLLATION_OID 951
#endif /* PG_COLLATION_H */
diff --git a/src/include/port.h b/src/include/port.h
index 9d08b392ce1..1116a92fd1f 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -155,6 +155,8 @@ extern int pg_strcasecmp(const char *s1, const char *s2);
extern int pg_strncasecmp(const char *s1, const char *s2, size_t n);
extern unsigned char pg_toupper(unsigned char ch);
extern unsigned char pg_tolower(unsigned char ch);
+extern unsigned char pg_ascii_toupper(unsigned char ch);
+extern unsigned char pg_ascii_tolower(unsigned char ch);
#ifdef USE_REPL_SNPRINTF
diff --git a/src/port/pgstrcasecmp.c b/src/port/pgstrcasecmp.c
index 1680124df0d..f6e226f0f2c 100644
--- a/src/port/pgstrcasecmp.c
+++ b/src/port/pgstrcasecmp.c
@@ -13,6 +13,10 @@
*
* NB: this code should match downcase_truncate_identifier() in scansup.c.
*
+ * We also provide strict ASCII-only case conversion functions, which can
+ * be used to implement C/POSIX case folding semantics no matter what the
+ * C library thinks the locale is.
+ *
*
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
*
@@ -123,3 +127,25 @@ pg_tolower(unsigned char ch)
ch = tolower(ch);
return ch;
}
+
+/*
+ * Fold a character to upper case, following C/POSIX locale rules.
+ */
+unsigned char
+pg_ascii_toupper(unsigned char ch)
+{
+ if (ch >= 'a' && ch <= 'z')
+ ch += 'A' - 'a';
+ return ch;
+}
+
+/*
+ * Fold a character to lower case, following C/POSIX locale rules.
+ */
+unsigned char
+pg_ascii_tolower(unsigned char ch)
+{
+ if (ch >= 'A' && ch <= 'Z')
+ ch += 'a' - 'A';
+ return ch;
+}