aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/formatting.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2011-03-20 12:43:39 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2011-03-20 12:44:13 -0400
commit176d5bae1d636fc1e91840b12cbd04c96d638b7e (patch)
treef861d3f9d9eb2bead0cd932e7825271fb1fbc1e1 /src/backend/utils/adt/formatting.c
parentc2f4ea469b52e6f7fedff651a4aa0acced873a5f (diff)
downloadpostgresql-176d5bae1d636fc1e91840b12cbd04c96d638b7e.tar.gz
postgresql-176d5bae1d636fc1e91840b12cbd04c96d638b7e.zip
Fix up handling of C/POSIX collations.
Install just one instance of the "C" and "POSIX" collations into pg_collation, rather than one per encoding. Make these instances exist and do something useful even in machines without locale_t support: to wit, it's now possible to force comparisons and case-folding functions to use C locale in an otherwise non-C database, whether or not the platform has support for using any additional collations. Fix up severely broken upper/lower/initcap functions, too: the C/POSIX fastpath now does what it is supposed to, and non-default collations are handled correctly in single-byte database encodings. Merge the two separate collation hashtables that were being maintained in pg_locale.c, and be more wary of the possibility that we fail partway through filling a cache entry.
Diffstat (limited to 'src/backend/utils/adt/formatting.c')
-rw-r--r--src/backend/utils/adt/formatting.c163
1 files changed, 137 insertions, 26 deletions
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index aba11459bb1..54783103a2c 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1462,10 +1462,16 @@ str_numth(char *dest, char *num, int type)
* in multibyte character sets. Note that in either case we are effectively
* assuming that the database character encoding matches the encoding implied
* by LC_CTYPE.
+ *
+ * If the system provides locale_t and associated functions (which are
+ * standardized by Open Group's XBD), we can support collations that are
+ * neither default nor C. The code is written to handle both combinations
+ * of have-wide-characters and have-locale_t, though it's rather unlikely
+ * a platform would have the latter without the former.
*/
/*
- * wide-character-aware lower function
+ * collation-aware, wide-character-aware lower function
*
* We pass the number of bytes so we can pass varlena and char*
* to this function. The result is a palloc'd, null-terminated string.
@@ -1474,21 +1480,31 @@ char *
str_tolower(const char *buff, size_t nbytes, Oid collid)
{
char *result;
- pg_locale_t mylocale = 0;
if (!buff)
return NULL;
- if (collid != DEFAULT_COLLATION_OID)
- mylocale = pg_newlocale_from_collation(collid);
+ /* C/POSIX collations use this path regardless of database encoding */
+ if (lc_ctype_is_c(collid))
+ {
+ char *p;
+
+ result = pnstrdup(buff, nbytes);
+ for (p = result; *p; p++)
+ *p = pg_ascii_tolower((unsigned char) *p);
+ }
#ifdef USE_WIDE_UPPER_LOWER
- if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collid))
+ else if (pg_database_encoding_max_length() > 1)
{
+ pg_locale_t mylocale = 0;
wchar_t *workspace;
size_t curr_char;
size_t result_size;
+ if (collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
/* Overflow paranoia */
if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
ereport(ERROR,
@@ -1501,12 +1517,14 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
char2wchar(workspace, nbytes + 1, buff, nbytes, collid);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ {
#ifdef HAVE_LOCALE_T
if (mylocale)
workspace[curr_char] = towlower_l(workspace[curr_char], mylocale);
else
#endif
- workspace[curr_char] = towlower(workspace[curr_char]);
+ workspace[curr_char] = towlower(workspace[curr_char]);
+ }
/* Make result large enough; case change might change number of bytes */
result_size = curr_char * pg_database_encoding_max_length() + 1;
@@ -1515,22 +1533,40 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
wchar2char(result, workspace, result_size, collid);
pfree(workspace);
}
- else
#endif /* USE_WIDE_UPPER_LOWER */
+ else
{
+ pg_locale_t mylocale = 0;
char *p;
+ if (collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
result = pnstrdup(buff, nbytes);
+ /*
+ * Note: we assume that tolower_l() will not be so broken as to need
+ * an isupper_l() guard test. When using the default collation, we
+ * apply the traditional Postgres behavior that forces ASCII-style
+ * treatment of I/i, but in non-default collations you get exactly
+ * what the collation says.
+ */
for (p = result; *p; p++)
- *p = pg_tolower((unsigned char) *p);
+ {
+#ifdef HAVE_LOCALE_T
+ if (mylocale)
+ *p = tolower_l((unsigned char) *p, mylocale);
+ else
+#endif
+ *p = pg_tolower((unsigned char) *p);
+ }
}
return result;
}
/*
- * wide-character-aware upper function
+ * collation-aware, wide-character-aware upper function
*
* We pass the number of bytes so we can pass varlena and char*
* to this function. The result is a palloc'd, null-terminated string.
@@ -1539,21 +1575,31 @@ char *
str_toupper(const char *buff, size_t nbytes, Oid collid)
{
char *result;
- pg_locale_t mylocale = 0;
if (!buff)
return NULL;
- if (collid != DEFAULT_COLLATION_OID)
- mylocale = pg_newlocale_from_collation(collid);
+ /* C/POSIX collations use this path regardless of database encoding */
+ if (lc_ctype_is_c(collid))
+ {
+ char *p;
+ result = pnstrdup(buff, nbytes);
+
+ for (p = result; *p; p++)
+ *p = pg_ascii_toupper((unsigned char) *p);
+ }
#ifdef USE_WIDE_UPPER_LOWER
- if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collid))
+ else if (pg_database_encoding_max_length() > 1)
{
+ pg_locale_t mylocale = 0;
wchar_t *workspace;
size_t curr_char;
size_t result_size;
+ if (collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
/* Overflow paranoia */
if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
ereport(ERROR,
@@ -1566,12 +1612,14 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
char2wchar(workspace, nbytes + 1, buff, nbytes, collid);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ {
#ifdef HAVE_LOCALE_T
if (mylocale)
workspace[curr_char] = towupper_l(workspace[curr_char], mylocale);
else
#endif
- workspace[curr_char] = towupper(workspace[curr_char]);
+ workspace[curr_char] = towupper(workspace[curr_char]);
+ }
/* Make result large enough; case change might change number of bytes */
result_size = curr_char * pg_database_encoding_max_length() + 1;
@@ -1580,22 +1628,40 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
wchar2char(result, workspace, result_size, collid);
pfree(workspace);
}
- else
#endif /* USE_WIDE_UPPER_LOWER */
+ else
{
+ pg_locale_t mylocale = 0;
char *p;
+ if (collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
result = pnstrdup(buff, nbytes);
+ /*
+ * Note: we assume that toupper_l() will not be so broken as to need
+ * an islower_l() guard test. When using the default collation, we
+ * apply the traditional Postgres behavior that forces ASCII-style
+ * treatment of I/i, but in non-default collations you get exactly
+ * what the collation says.
+ */
for (p = result; *p; p++)
- *p = pg_toupper((unsigned char) *p);
+ {
+#ifdef HAVE_LOCALE_T
+ if (mylocale)
+ *p = toupper_l((unsigned char) *p, mylocale);
+ else
+#endif
+ *p = pg_toupper((unsigned char) *p);
+ }
}
return result;
}
/*
- * wide-character-aware initcap function
+ * collation-aware, wide-character-aware initcap function
*
* We pass the number of bytes so we can pass varlena and char*
* to this function. The result is a palloc'd, null-terminated string.
@@ -1605,21 +1671,42 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
{
char *result;
int wasalnum = false;
- pg_locale_t mylocale = 0;
if (!buff)
return NULL;
- if (collid != DEFAULT_COLLATION_OID)
- mylocale = pg_newlocale_from_collation(collid);
+ /* C/POSIX collations use this path regardless of database encoding */
+ if (lc_ctype_is_c(collid))
+ {
+ char *p;
+
+ result = pnstrdup(buff, nbytes);
+ for (p = result; *p; p++)
+ {
+ char c;
+
+ if (wasalnum)
+ *p = c = pg_ascii_tolower((unsigned char) *p);
+ else
+ *p = c = pg_ascii_toupper((unsigned char) *p);
+ /* we don't trust isalnum() here */
+ wasalnum = ((c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z') ||
+ (c >= '0' && c <= '9'));
+ }
+ }
#ifdef USE_WIDE_UPPER_LOWER
- if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collid))
+ else if (pg_database_encoding_max_length() > 1)
{
+ pg_locale_t mylocale = 0;
wchar_t *workspace;
size_t curr_char;
size_t result_size;
+ if (collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
/* Overflow paranoia */
if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
ereport(ERROR,
@@ -1660,20 +1747,44 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
wchar2char(result, workspace, result_size, collid);
pfree(workspace);
}
- else
#endif /* USE_WIDE_UPPER_LOWER */
+ else
{
+ pg_locale_t mylocale = 0;
char *p;
+ if (collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
result = pnstrdup(buff, nbytes);
+ /*
+ * Note: we assume that toupper_l()/tolower_l() will not be so broken
+ * as to need guard tests. When using the default collation, we apply
+ * the traditional Postgres behavior that forces ASCII-style treatment
+ * of I/i, but in non-default collations you get exactly what the
+ * collation says.
+ */
for (p = result; *p; p++)
{
- if (wasalnum)
- *p = pg_tolower((unsigned char) *p);
+#ifdef HAVE_LOCALE_T
+ if (mylocale)
+ {
+ if (wasalnum)
+ *p = tolower_l((unsigned char) *p, mylocale);
+ else
+ *p = toupper_l((unsigned char) *p, mylocale);
+ wasalnum = isalnum_l((unsigned char) *p, mylocale);
+ }
else
- *p = pg_toupper((unsigned char) *p);
- wasalnum = isalnum((unsigned char) *p);
+#endif
+ {
+ if (wasalnum)
+ *p = pg_tolower((unsigned char) *p);
+ else
+ *p = pg_toupper((unsigned char) *p);
+ wasalnum = isalnum((unsigned char) *p);
+ }
}
}