Fix up handling of C/POSIX collations.

Install just one instance of the "C" and "POSIX" collations into pg_collation, rather than one per encoding. Make these instances exist and do something useful even in machines without locale_t support: to wit, it's now possible to force comparisons and case-folding functions to use C locale in an otherwise non-C database, whether or not the platform has support for using any additional collations. Fix up severely broken upper/lower/initcap functions, too: the C/POSIX fastpath now does what it is supposed to, and non-default collations are handled correctly in single-byte database encodings. Merge the two separate collation hashtables that were being maintained in pg_locale.c, and be more wary of the possibility that we fail partway through filling a cache entry.
author: Tom Lane <tgl@sss.pgh.pa.us> 2011-03-20 12:43:39 -0400
committer: Tom Lane <tgl@sss.pgh.pa.us> 2011-03-20 12:44:13 -0400
commit: 176d5bae1d636fc1e91840b12cbd04c96d638b7e (patch)
tree: f861d3f9d9eb2bead0cd932e7825271fb1fbc1e1 /src
parent: c2f4ea469b52e6f7fedff651a4aa0acced873a5f (diff)
download: postgresql-176d5bae1d636fc1e91840b12cbd04c96d638b7e.tar.gz
postgresql-176d5bae1d636fc1e91840b12cbd04c96d638b7e.zip
7 files changed, 461 insertions, 253 deletions
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index aba11459bb1..54783103a2c 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1462,10 +1462,16 @@ str_numth(char *dest, char *num, int type)
  * in multibyte character sets.  Note that in either case we are effectively
  * assuming that the database character encoding matches the encoding implied
  * by LC_CTYPE.
+ *
+ * If the system provides locale_t and associated functions (which are
+ * standardized by Open Group's XBD), we can support collations that are
+ * neither default nor C.  The code is written to handle both combinations
+ * of have-wide-characters and have-locale_t, though it's rather unlikely
+ * a platform would have the latter without the former.
  */
 
 /*
- * wide-character-aware lower function
+ * collation-aware, wide-character-aware lower function
  *
  * We pass the number of bytes so we can pass varlena and char*
  * to this function.  The result is a palloc'd, null-terminated string.
@@ -1474,21 +1480,31 @@ char *
 str_tolower(const char *buff, size_t nbytes, Oid collid)
 {
 	char	   *result;
-	pg_locale_t	mylocale = 0;
 
 	if (!buff)
 		return NULL;
 
-	if (collid != DEFAULT_COLLATION_OID)
-		mylocale = pg_newlocale_from_collation(collid);
+	/* C/POSIX collations use this path regardless of database encoding */
+	if (lc_ctype_is_c(collid))
+	{
+		char	   *p;
+
+		result = pnstrdup(buff, nbytes);
 
+		for (p = result; *p; p++)
+			*p = pg_ascii_tolower((unsigned char) *p);
+	}
 #ifdef USE_WIDE_UPPER_LOWER
-	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collid))
+	else if (pg_database_encoding_max_length() > 1)
 	{
+		pg_locale_t	mylocale = 0;
 		wchar_t    *workspace;
 		size_t		curr_char;
 		size_t		result_size;
 
+		if (collid != DEFAULT_COLLATION_OID)
+			mylocale = pg_newlocale_from_collation(collid);
+
 		/* Overflow paranoia */
 		if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
 			ereport(ERROR,
@@ -1501,12 +1517,14 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 		char2wchar(workspace, nbytes + 1, buff, nbytes, collid);
 
 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+		{
 #ifdef HAVE_LOCALE_T
 			if (mylocale)
 				workspace[curr_char] = towlower_l(workspace[curr_char], mylocale);
 			else
 #endif
-			workspace[curr_char] = towlower(workspace[curr_char]);
+				workspace[curr_char] = towlower(workspace[curr_char]);
+		}
 
 		/* Make result large enough; case change might change number of bytes */
 		result_size = curr_char * pg_database_encoding_max_length() + 1;
@@ -1515,22 +1533,40 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 		wchar2char(result, workspace, result_size, collid);
 		pfree(workspace);
 	}
-	else
 #endif   /* USE_WIDE_UPPER_LOWER */
+	else
 	{
+		pg_locale_t	mylocale = 0;
 		char	   *p;
 
+		if (collid != DEFAULT_COLLATION_OID)
+			mylocale = pg_newlocale_from_collation(collid);
+
 		result = pnstrdup(buff, nbytes);
 
+		/*
+		 * Note: we assume that tolower_l() will not be so broken as to need
+		 * an isupper_l() guard test.  When using the default collation, we
+		 * apply the traditional Postgres behavior that forces ASCII-style
+		 * treatment of I/i, but in non-default collations you get exactly
+		 * what the collation says.
+		 */
 		for (p = result; *p; p++)
-			*p = pg_tolower((unsigned char) *p);
+		{
+#ifdef HAVE_LOCALE_T
+			if (mylocale)
+				*p = tolower_l((unsigned char) *p, mylocale);
+			else
+#endif
+				*p = pg_tolower((unsigned char) *p);
+		}
 	}
 
 	return result;
 }
 
 /*
- * wide-character-aware upper function
+ * collation-aware, wide-character-aware upper function
  *
  * We pass the number of bytes so we can pass varlena and char*
  * to this function.  The result is a palloc'd, null-terminated string.
@@ -1539,21 +1575,31 @@ char *
 str_toupper(const char *buff, size_t nbytes, Oid collid)
 {
 	char	   *result;
-	pg_locale_t	mylocale = 0;
 
 	if (!buff)
 		return NULL;
 
-	if (collid != DEFAULT_COLLATION_OID)
-		mylocale = pg_newlocale_from_collation(collid);
+	/* C/POSIX collations use this path regardless of database encoding */
+	if (lc_ctype_is_c(collid))
+	{
+		char	   *p;
 
+		result = pnstrdup(buff, nbytes);
+
+		for (p = result; *p; p++)
+			*p = pg_ascii_toupper((unsigned char) *p);
+	}
 #ifdef USE_WIDE_UPPER_LOWER
-	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collid))
+	else if (pg_database_encoding_max_length() > 1)
 	{
+		pg_locale_t	mylocale = 0;
 		wchar_t    *workspace;
 		size_t		curr_char;
 		size_t		result_size;
 
+		if (collid != DEFAULT_COLLATION_OID)
+			mylocale = pg_newlocale_from_collation(collid);
+
 		/* Overflow paranoia */
 		if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
 			ereport(ERROR,
@@ -1566,12 +1612,14 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 		char2wchar(workspace, nbytes + 1, buff, nbytes, collid);
 
 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+		{
 #ifdef HAVE_LOCALE_T
 			if (mylocale)
 				workspace[curr_char] = towupper_l(workspace[curr_char], mylocale);
 			else
 #endif
-			workspace[curr_char] = towupper(workspace[curr_char]);
+				workspace[curr_char] = towupper(workspace[curr_char]);
+		}
 
 		/* Make result large enough; case change might change number of bytes */
 		result_size = curr_char * pg_database_encoding_max_length() + 1;
@@ -1580,22 +1628,40 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 		wchar2char(result, workspace, result_size, collid);
 		pfree(workspace);
 	}
-	else
 #endif   /* USE_WIDE_UPPER_LOWER */
+	else
 	{
+		pg_locale_t	mylocale = 0;
 		char	   *p;
 
+		if (collid != DEFAULT_COLLATION_OID)
+			mylocale = pg_newlocale_from_collation(collid);
+
 		result = pnstrdup(buff, nbytes);
 
+		/*
+		 * Note: we assume that toupper_l() will not be so broken as to need
+		 * an islower_l() guard test.  When using the default collation, we
+		 * apply the traditional Postgres behavior that forces ASCII-style
+		 * treatment of I/i, but in non-default collations you get exactly
+		 * what the collation says.
+		 */
 		for (p = result; *p; p++)
-			*p = pg_toupper((unsigned char) *p);
+		{
+#ifdef HAVE_LOCALE_T
+			if (mylocale)
+				*p = toupper_l((unsigned char) *p, mylocale);
+			else
+#endif
+				*p = pg_toupper((unsigned char) *p);
+		}
 	}
 
 	return result;
 }
 
 /*
- * wide-character-aware initcap function
+ * collation-aware, wide-character-aware initcap function
  *
  * We pass the number of bytes so we can pass varlena and char*
  * to this function.  The result is a palloc'd, null-terminated string.
@@ -1605,21 +1671,42 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 {
 	char	   *result;
 	int			wasalnum = false;
-	pg_locale_t	mylocale = 0;
 
 	if (!buff)
 		return NULL;
 
-	if (collid != DEFAULT_COLLATION_OID)
-		mylocale = pg_newlocale_from_collation(collid);
+	/* C/POSIX collations use this path regardless of database encoding */
+	if (lc_ctype_is_c(collid))
+	{
+		char	   *p;
+
+		result = pnstrdup(buff, nbytes);
 
+		for (p = result; *p; p++)
+		{
+			char	c;
+
+			if (wasalnum)
+				*p = c = pg_ascii_tolower((unsigned char) *p);
+			else
+				*p = c = pg_ascii_toupper((unsigned char) *p);
+			/* we don't trust isalnum() here */
+			wasalnum = ((c >= 'A' && c <= 'Z') ||
+						(c >= 'a' && c <= 'z') ||
+						(c >= '0' && c <= '9'));
+		}
+	}
 #ifdef USE_WIDE_UPPER_LOWER
-	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collid))
+	else if (pg_database_encoding_max_length() > 1)
 	{
+		pg_locale_t	mylocale = 0;
 		wchar_t    *workspace;
 		size_t		curr_char;
 		size_t		result_size;
 
+		if (collid != DEFAULT_COLLATION_OID)
+			mylocale = pg_newlocale_from_collation(collid);
+
 		/* Overflow paranoia */
 		if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
 			ereport(ERROR,
@@ -1660,20 +1747,44 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 		wchar2char(result, workspace, result_size, collid);
 		pfree(workspace);
 	}
-	else
 #endif   /* USE_WIDE_UPPER_LOWER */
+	else
 	{
+		pg_locale_t	mylocale = 0;
 		char	   *p;
 
+		if (collid != DEFAULT_COLLATION_OID)
+			mylocale = pg_newlocale_from_collation(collid);
+
 		result = pnstrdup(buff, nbytes);
 
+		/*
+		 * Note: we assume that toupper_l()/tolower_l() will not be so broken
+		 * as to need guard tests.  When using the default collation, we apply
+		 * the traditional Postgres behavior that forces ASCII-style treatment
+		 * of I/i, but in non-default collations you get exactly what the
+		 * collation says.
+		 */
 		for (p = result; *p; p++)
 		{
-			if (wasalnum)
-				*p = pg_tolower((unsigned char) *p);
+#ifdef HAVE_LOCALE_T
+			if (mylocale)
+			{
+				if (wasalnum)
+					*p = tolower_l((unsigned char) *p, mylocale);
+				else
+					*p = toupper_l((unsigned char) *p, mylocale);
+				wasalnum = isalnum_l((unsigned char) *p, mylocale);
+			}
 			else
-				*p = pg_toupper((unsigned char) *p);
-			wasalnum = isalnum((unsigned char) *p);
+#endif
+			{
+				if (wasalnum)
+					*p = pg_tolower((unsigned char) *p);
+				else
+					*p = pg_toupper((unsigned char) *p);
+				wasalnum = isalnum((unsigned char) *p);
+			}
 		}
 	}
 
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 2b9b321b263..15d347c4f89 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -99,15 +99,24 @@ static char lc_monetary_envbuf[LC_ENV_BUFSIZE];
 static char lc_numeric_envbuf[LC_ENV_BUFSIZE];
 static char lc_time_envbuf[LC_ENV_BUFSIZE];
 
+/* Cache for collation-related knowledge */
+
+typedef struct
+{
+	Oid			collid;			/* hash key: pg_collation OID */
+	bool		collate_is_c;	/* is collation's LC_COLLATE C? */
+	bool		ctype_is_c;		/* is collation's LC_CTYPE C? */
+	bool		flags_valid;	/* true if above flags are valid */
+	pg_locale_t	locale;			/* locale_t struct, or 0 if not valid */
+} collation_cache_entry;
+
+static HTAB *collation_cache = NULL;
+
+
 #if defined(WIN32) && defined(LC_MESSAGES)
 static char *IsoLocaleName(const char *);		/* MSVC specific */
 #endif
 
-static HTAB *locale_cness_cache = NULL;
-#ifdef HAVE_LOCALE_T
-static HTAB *locale_t_cache = NULL;
-#endif
-
 
 /*
  * pg_perm_setlocale
@@ -313,136 +322,6 @@ locale_messages_assign(const char *value, bool doit, GucSource source)
 
 
 /*
- * We'd like to cache whether LC_COLLATE or LC_CTYPE is C (or POSIX),
- * so we can optimize a few code paths in various places.
- *
- * Note that some code relies on this not reporting false negatives
- * (that is, saying it's not C when it is).  For example, char2wchar()
- * could fail if the locale is C, so str_tolower() shouldn't call it
- * in that case.
- */
-
-struct locale_cness_cache_entry
-{
-	Oid			collid;
-	bool		collate_is_c;
-	bool		ctype_is_c;
-};
-
-static void
-init_locale_cness_cache(void)
-{
-	HASHCTL		ctl;
-
-	memset(&ctl, 0, sizeof(ctl));
-	ctl.keysize = sizeof(Oid);
-	ctl.entrysize = sizeof(struct locale_cness_cache_entry);
-	ctl.hash = oid_hash;
-	locale_cness_cache = hash_create("locale C-ness cache", 1000, &ctl, HASH_ELEM | HASH_FUNCTION);
-}
-
-/*
- * Handle caching of locale "C-ness" for nondefault collation objects.
- * Relying on the system cache directly isn't fast enough.
- */
-static bool
-lookup_collation_cness(Oid collation, int category)
-{
-	struct locale_cness_cache_entry *cache_entry;
-	bool		found;
-	HeapTuple	tp;
-	char	   *localeptr;
-
-	Assert(OidIsValid(collation));
-	Assert(category == LC_COLLATE || category == LC_CTYPE);
-
-	if (!locale_cness_cache)
-		init_locale_cness_cache();
-
-	cache_entry = hash_search(locale_cness_cache, &collation, HASH_ENTER, &found);
-	if (found)
-	{
-		if (category == LC_COLLATE)
-			return cache_entry->collate_is_c;
-		else
-			return cache_entry->ctype_is_c;
-	}
-
-	tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
-	if (!HeapTupleIsValid(tp))
-		elog(ERROR, "cache lookup failed for collation %u", collation);
-
-	localeptr = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate);
-	cache_entry->collate_is_c = (strcmp(localeptr, "C") == 0) || (strcmp(localeptr, "POSIX") == 0);
-
-	localeptr = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype);
-	cache_entry->ctype_is_c = (strcmp(localeptr, "C") == 0) || (strcmp(localeptr, "POSIX") == 0);
-
-	ReleaseSysCache(tp);
-
-	return category == LC_COLLATE ? cache_entry->collate_is_c : cache_entry->ctype_is_c;
-}
-
-
-bool
-lc_collate_is_c(Oid collation)
-{
-	/* Cache result so we only have to compute it once */
-	static int	result = -1;
-	char	   *localeptr;
-
-	if (!OidIsValid(collation))
-		return false;
-
-	if (collation != DEFAULT_COLLATION_OID)
-		return lookup_collation_cness(collation, LC_COLLATE);
-
-	if (result >= 0)
-		return (bool) result;
-	localeptr = setlocale(LC_COLLATE, NULL);
-	if (!localeptr)
-		elog(ERROR, "invalid LC_COLLATE setting");
-
-	if (strcmp(localeptr, "C") == 0)
-		result = true;
-	else if (strcmp(localeptr, "POSIX") == 0)
-		result = true;
-	else
-		result = false;
-	return (bool) result;
-}
-
-
-bool
-lc_ctype_is_c(Oid collation)
-{
-	/* Cache result so we only have to compute it once */
-	static int	result = -1;
-	char	   *localeptr;
-
-	if (!OidIsValid(collation))
-		return false;
-
-	if (collation != DEFAULT_COLLATION_OID)
-		return lookup_collation_cness(collation, LC_CTYPE);
-
-	if (result >= 0)
-		return (bool) result;
-	localeptr = setlocale(LC_CTYPE, NULL);
-	if (!localeptr)
-		elog(ERROR, "invalid LC_CTYPE setting");
-
-	if (strcmp(localeptr, "C") == 0)
-		result = true;
-	else if (strcmp(localeptr, "POSIX") == 0)
-		result = true;
-	else
-		result = false;
-	return (bool) result;
-}
-
-
-/*
  * Frees the malloced content of a struct lconv.  (But not the struct
  * itself.)
  */
@@ -844,116 +723,295 @@ IsoLocaleName(const char *winlocname)
 #endif   /* WIN32 && LC_MESSAGES */
 
 
-#ifdef HAVE_LOCALE_T
-struct locale_t_cache_entry
+/*
+ * Cache mechanism for collation information.
+ *
+ * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
+ * (or POSIX), so we can optimize a few code paths in various places.
+ * For the built-in C and POSIX collations, we can know that without even
+ * doing a cache lookup, but we want to support aliases for C/POSIX too.
+ * For the "default" collation, there are separate static cache variables,
+ * since consulting the pg_collation catalog doesn't tell us what we need.
+ *
+ * Also, if a pg_locale_t has been requested for a collation, we cache that
+ * for the life of a backend.
+ *
+ * Note that some code relies on the flags not reporting false negatives
+ * (that is, saying it's not C when it is).  For example, char2wchar()
+ * could fail if the locale is C, so str_tolower() shouldn't call it
+ * in that case.
+ *
+ * Note that we currently lack any way to flush the cache.  Since we don't
+ * support ALTER COLLATION, this is OK.  The worst case is that someone
+ * drops a collation, and a useless cache entry hangs around in existing
+ * backends.
+ */
+
+static collation_cache_entry *
+lookup_collation_cache(Oid collation, bool set_flags)
 {
-	Oid			collid;
-	locale_t	locale;
-};
+	collation_cache_entry *cache_entry;
+	bool		found;
 
-static void
-init_locale_t_cache(void)
+	Assert(OidIsValid(collation));
+	Assert(collation != DEFAULT_COLLATION_OID);
+
+	if (collation_cache == NULL)
+	{
+		/* First time through, initialize the hash table */
+		HASHCTL		ctl;
+
+		memset(&ctl, 0, sizeof(ctl));
+		ctl.keysize = sizeof(Oid);
+		ctl.entrysize = sizeof(collation_cache_entry);
+		ctl.hash = oid_hash;
+		collation_cache = hash_create("Collation cache", 100, &ctl,
+									  HASH_ELEM | HASH_FUNCTION);
+	}
+
+	cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
+	if (!found)
+	{
+		/*
+		 * Make sure cache entry is marked invalid, in case we fail before
+		 * setting things.
+		 */
+		cache_entry->flags_valid = false;
+		cache_entry->locale = 0;
+	}
+
+	if (set_flags && !cache_entry->flags_valid)
+	{
+		/* Attempt to set the flags */
+		HeapTuple	tp;
+		Form_pg_collation collform;
+		const char *collcollate;
+		const char *collctype;
+
+		tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
+		if (!HeapTupleIsValid(tp))
+			elog(ERROR, "cache lookup failed for collation %u", collation);
+		collform = (Form_pg_collation) GETSTRUCT(tp);
+
+		collcollate = NameStr(collform->collcollate);
+		collctype = NameStr(collform->collctype);
+
+		cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
+									 (strcmp(collcollate, "POSIX") == 0));
+		cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
+								   (strcmp(collctype, "POSIX") == 0));
+
+		cache_entry->flags_valid = true;
+
+		ReleaseSysCache(tp);
+	}
+
+	return cache_entry;
+}
+
+
+/*
+ * Detect whether collation's LC_COLLATE property is C
+ */
+bool
+lc_collate_is_c(Oid collation)
 {
-	HASHCTL		ctl;
+	/*
+	 * If we're asked about "collation 0", return false, so that the code
+	 * will go into the non-C path and report that the collation is bogus.
+	 */
+	if (!OidIsValid(collation))
+		return false;
+
+	/*
+	 * If we're asked about the default collation, we have to inquire of
+	 * the C library.  Cache the result so we only have to compute it once.
+	 */
+	if (collation == DEFAULT_COLLATION_OID)
+	{
+		static int	result = -1;
+		char	   *localeptr;
+
+		if (result >= 0)
+			return (bool) result;
+		localeptr = setlocale(LC_COLLATE, NULL);
+		if (!localeptr)
+			elog(ERROR, "invalid LC_COLLATE setting");
+
+		if (strcmp(localeptr, "C") == 0)
+			result = true;
+		else if (strcmp(localeptr, "POSIX") == 0)
+			result = true;
+		else
+			result = false;
+		return (bool) result;
+	}
+
+	/*
+	 * If we're asked about the built-in C/POSIX collations, we know that.
+	 */
+	if (collation == C_COLLATION_OID ||
+		collation == POSIX_COLLATION_OID)
+		return true;
+
+	/*
+	 * Otherwise, we have to consult pg_collation, but we cache that.
+	 */
+	return (lookup_collation_cache(collation, true))->collate_is_c;
+}
+
+/*
+ * Detect whether collation's LC_CTYPE property is C
+ */
+bool
+lc_ctype_is_c(Oid collation)
+{
+	/*
+	 * If we're asked about "collation 0", return false, so that the code
+	 * will go into the non-C path and report that the collation is bogus.
+	 */
+	if (!OidIsValid(collation))
+		return false;
+
+	/*
+	 * If we're asked about the default collation, we have to inquire of
+	 * the C library.  Cache the result so we only have to compute it once.
+	 */
+	if (collation == DEFAULT_COLLATION_OID)
+	{
+		static int	result = -1;
+		char	   *localeptr;
+
+		if (result >= 0)
+			return (bool) result;
+		localeptr = setlocale(LC_CTYPE, NULL);
+		if (!localeptr)
+			elog(ERROR, "invalid LC_CTYPE setting");
+
+		if (strcmp(localeptr, "C") == 0)
+			result = true;
+		else if (strcmp(localeptr, "POSIX") == 0)
+			result = true;
+		else
+			result = false;
+		return (bool) result;
+	}
+
+	/*
+	 * If we're asked about the built-in C/POSIX collations, we know that.
+	 */
+	if (collation == C_COLLATION_OID ||
+		collation == POSIX_COLLATION_OID)
+		return true;
 
-	memset(&ctl, 0, sizeof(ctl));
-	ctl.keysize = sizeof(Oid);
-	ctl.entrysize = sizeof(struct locale_t_cache_entry);
-	ctl.hash = oid_hash;
-	locale_t_cache = hash_create("locale_t cache", 1000, &ctl, HASH_ELEM | HASH_FUNCTION);
+	/*
+	 * Otherwise, we have to consult pg_collation, but we cache that.
+	 */
+	return (lookup_collation_cache(collation, true))->ctype_is_c;
 }
-#endif /* HAVE_LOCALE_T */
+
 
 /*
  * Create a locale_t from a collation OID.  Results are cached for the
- * lifetime of the backend.  Thus, do not free the result with
- * freelocale().
+ * lifetime of the backend.  Thus, do not free the result with freelocale().
  *
- * As a special optimization, the default/database collation returns
- * 0.  Callers should then revert to the non-locale_t-enabled code
- * path.  In fact, they shouldn't call this function at all when they
- * are dealing with the default locale.  That can save quite a bit in
- * hotspots.
+ * As a special optimization, the default/database collation returns 0.
+ * Callers should then revert to the non-locale_t-enabled code path.
+ * In fact, they shouldn't call this function at all when they are dealing
+ * with the default locale.  That can save quite a bit in hotspots.
+ * Also, callers should avoid calling this before going down a C/POSIX
+ * fastpath, because such a fastpath should work even on platforms without
+ * locale_t support in the C library.
  *
  * For simplicity, we always generate COLLATE + CTYPE even though we
- * might only need one of them.  Since this is called only once per
- * session, it shouldn't cost much.
+ * might only need one of them.  Since this is called only once per session,
+ * it shouldn't cost much.
  */
 pg_locale_t
 pg_newlocale_from_collation(Oid collid)
 {
-#ifdef HAVE_LOCALE_T
-	HeapTuple	tp;
-	const char *collcollate;
-	const char *collctype;
-	locale_t	result;
-	struct locale_t_cache_entry *cache_entry;
-	bool		found;
+	collation_cache_entry *cache_entry;
 
+	/* Return 0 for "default" collation, just in case caller forgets */
 	if (collid == DEFAULT_COLLATION_OID)
-		return (locale_t) 0;
+		return (pg_locale_t) 0;
 
+	/*
+	 * This is where we'll fail if a collation-aware function is invoked
+	 * and no collation OID is passed.  This typically means that the
+	 * parser could not resolve a conflict of implicit collations, so
+	 * report it that way.
+	 */
 	if (!OidIsValid(collid))
-		elog(ERROR, "locale operation to be invoked, but no collation was derived");
+		ereport(ERROR,
+				(errcode(ERRCODE_INDETERMINATE_COLLATION),
+				 errmsg("locale operation to be invoked, but no collation was derived")));
 
-	if (!locale_t_cache)
-		init_locale_t_cache();
+	cache_entry = lookup_collation_cache(collid, false);
 
-	cache_entry = hash_search(locale_t_cache, &collid, HASH_ENTER, &found);
-	if (found)
-		return cache_entry->locale;
+	if (cache_entry->locale == 0)
+	{
+		/* We haven't computed this yet in this session, so do it */
+#ifdef HAVE_LOCALE_T
+		HeapTuple	tp;
+		Form_pg_collation collform;
+		const char *collcollate;
+		const char *collctype;
+		locale_t	result;
 
-	tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
-	if (!HeapTupleIsValid(tp))
-		elog(ERROR, "cache lookup failed for collation %u", collid);
+		tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
+		if (!HeapTupleIsValid(tp))
+			elog(ERROR, "cache lookup failed for collation %u", collid);
+		collform = (Form_pg_collation) GETSTRUCT(tp);
 
-	collcollate = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate);
-	collctype = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype);
+		collcollate = NameStr(collform->collcollate);
+		collctype = NameStr(collform->collctype);
 
-	if (strcmp(collcollate, collctype) == 0)
-	{
-		result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, NULL);
-		if (!result)
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not create locale \"%s\": %m", collcollate)));
-	}
-	else
-	{
-		locale_t loc1;
-
-		loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
-		if (!loc1)
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not create locale \"%s\": %m", collcollate)));
-		result = newlocale(LC_CTYPE_MASK, collctype, loc1);
-		if (!result)
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not create locale \"%s\": %m", collctype)));
-	}
+		if (strcmp(collcollate, collctype) == 0)
+		{
+			/* Normal case where they're the same */
+			result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
+							   NULL);
+			if (!result)
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not create locale \"%s\": %m",
+								collcollate)));
+		}
+		else
+		{
+			/* We need two newlocale() steps */
+			locale_t loc1;
+
+			loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
+			if (!loc1)
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not create locale \"%s\": %m",
+								collcollate)));
+			result = newlocale(LC_CTYPE_MASK, collctype, loc1);
+			if (!result)
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not create locale \"%s\": %m",
+								collctype)));
+		}
 
-	ReleaseSysCache(tp);
+		cache_entry->locale = result;
 
-	cache_entry->locale = result;
+		ReleaseSysCache(tp);
 
-	return result;
 #else /* not HAVE_LOCALE_T */
-	/*
-	 * For platforms that don't support locale_t, check that we are
-	 * dealing with the default locale.  It's unlikely that we'll get
-	 * here, but it's possible if users are creating collations even
-	 * though they are not supported, or they are mixing builds in odd
-	 * ways.
-	 */
-	if (!OidIsValid(collid))
-		elog(ERROR, "locale operation to be invoked, but no collation was derived");
-	else if (collid != DEFAULT_COLLATION_OID)
+
+		/*
+		 * For platforms that don't support locale_t, we can't do anything
+		 * with non-default collations.
+		 */
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("nondefault collations are not supported on this platform")));
-
-	return 0;
 #endif /* not HAVE_LOCALE_T */
+	}
+
+	return cache_entry->locale;
 }
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index d509b1311d1..98e864d5495 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -1616,17 +1616,21 @@ setup_collation(void)
 		 */
 		skip = false;
 		for (i = 0; i < len; i++)
+		{
 			if (IS_HIGHBIT_SET(localebuf[i]))
 			{
-				if (debug)
-					fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: %s\n"),
-							progname, localebuf);
-				skipped++;
 				skip = true;
 				break;
 			}
+		}
 		if (skip)
+		{
+			if (debug)
+				fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: %s\n"),
+						progname, localebuf);
+			skipped++;
 			continue;
+		}
 
 		enc = pg_get_encoding_from_locale(localebuf, debug);
 		if (enc < 0)
@@ -1635,7 +1639,7 @@ setup_collation(void)
 			continue;			/* error message printed by pg_get_encoding_from_locale() */
 		}
 		if (enc == PG_SQL_ASCII)
-			continue;			/* SQL_ASCII is handled separately */
+			continue;			/* C/POSIX are already in the catalog */
 
 		PG_CMD_PRINTF2("INSERT INTO tmp_pg_collation (locale, encoding) VALUES ('%s', %d);",
 					   escape_quotes(localebuf), enc);
@@ -1651,10 +1655,6 @@ setup_collation(void)
 						   escape_quotes(alias), escape_quotes(localebuf), enc);
 	}
 
-	for (i = PG_SQL_ASCII; i <= PG_ENCODING_BE_LAST; i++)
-		PG_CMD_PRINTF2("INSERT INTO tmp_pg_collation (locale, encoding) VALUES ('C', %d), ('POSIX', %d);",
-					   i, i);
-
 	/* Add an SQL-standard name */
 	PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation (collname, locale, encoding) VALUES ('ucs_basic', 'C', %d);", PG_UTF8);
 
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index c10de537bec..e9659092198 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	201103191
+#define CATALOG_VERSION_NO	201103201
 
 #endif
diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h
index e90aa050f54..2ab0c504f65 100644
--- a/src/include/catalog/pg_collation.h
+++ b/src/include/catalog/pg_collation.h
@@ -58,8 +58,19 @@ typedef FormData_pg_collation *Form_pg_collation;
 #define Anum_pg_collation_collcollate	5
 #define Anum_pg_collation_collctype		6
 
-DATA(insert OID = 100 ( default PGNSP PGUID -1 "" "" ));
+/* ----------------
+ *		initial contents of pg_collation
+ * ----------------
+ */
+
+DATA(insert OID = 100 ( default		PGNSP PGUID -1 "" "" ));
 DESCR("database's default collation");
-#define DEFAULT_COLLATION_OID			100
+#define DEFAULT_COLLATION_OID	100
+DATA(insert OID = 950 ( C			PGNSP PGUID -1 "C" "C" ));
+DESCR("standard C collation");
+#define C_COLLATION_OID			950
+DATA(insert OID = 951 ( POSIX		PGNSP PGUID -1 "POSIX" "POSIX" ));
+DESCR("standard POSIX collation");
+#define POSIX_COLLATION_OID		951
 
 #endif   /* PG_COLLATION_H */
diff --git a/src/include/port.h b/src/include/port.h
index 9d08b392ce1..1116a92fd1f 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -155,6 +155,8 @@ extern int	pg_strcasecmp(const char *s1, const char *s2);
 extern int	pg_strncasecmp(const char *s1, const char *s2, size_t n);
 extern unsigned char pg_toupper(unsigned char ch);
 extern unsigned char pg_tolower(unsigned char ch);
+extern unsigned char pg_ascii_toupper(unsigned char ch);
+extern unsigned char pg_ascii_tolower(unsigned char ch);
 
 #ifdef USE_REPL_SNPRINTF
 
diff --git a/src/port/pgstrcasecmp.c b/src/port/pgstrcasecmp.c
index 1680124df0d..f6e226f0f2c 100644
--- a/src/port/pgstrcasecmp.c
+++ b/src/port/pgstrcasecmp.c
@@ -13,6 +13,10 @@
  *
  * NB: this code should match downcase_truncate_identifier() in scansup.c.
  *
+ * We also provide strict ASCII-only case conversion functions, which can
+ * be used to implement C/POSIX case folding semantics no matter what the
+ * C library thinks the locale is.
+ *
  *
  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
  *
@@ -123,3 +127,25 @@ pg_tolower(unsigned char ch)
 		ch = tolower(ch);
 	return ch;
 }
+
+/*
+ * Fold a character to upper case, following C/POSIX locale rules.
+ */
+unsigned char
+pg_ascii_toupper(unsigned char ch)
+{
+	if (ch >= 'a' && ch <= 'z')
+		ch += 'A' - 'a';
+	return ch;
+}
+
+/*
+ * Fold a character to lower case, following C/POSIX locale rules.
+ */
+unsigned char
+pg_ascii_tolower(unsigned char ch)
+{
+	if (ch >= 'A' && ch <= 'Z')
+		ch += 'a' - 'A';
+	return ch;
+}
author	Tom Lane <tgl@sss.pgh.pa.us>	2011-03-20 12:43:39 -0400
committer	Tom Lane <tgl@sss.pgh.pa.us>	2011-03-20 12:44:13 -0400
commit	176d5bae1d636fc1e91840b12cbd04c96d638b7e (patch)
tree	f861d3f9d9eb2bead0cd932e7825271fb1fbc1e1 /src
parent	c2f4ea469b52e6f7fedff651a4aa0acced873a5f (diff)
download	postgresql-176d5bae1d636fc1e91840b12cbd04c96d638b7e.tar.gz postgresql-176d5bae1d636fc1e91840b12cbd04c96d638b7e.zip