diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2011-03-20 12:43:39 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2011-03-20 12:44:13 -0400 |
commit | 176d5bae1d636fc1e91840b12cbd04c96d638b7e (patch) | |
tree | f861d3f9d9eb2bead0cd932e7825271fb1fbc1e1 /src | |
parent | c2f4ea469b52e6f7fedff651a4aa0acced873a5f (diff) | |
download | postgresql-176d5bae1d636fc1e91840b12cbd04c96d638b7e.tar.gz postgresql-176d5bae1d636fc1e91840b12cbd04c96d638b7e.zip |
Fix up handling of C/POSIX collations.
Install just one instance of the "C" and "POSIX" collations into
pg_collation, rather than one per encoding. Make these instances exist
and do something useful even in machines without locale_t support: to wit,
it's now possible to force comparisons and case-folding functions to use C
locale in an otherwise non-C database, whether or not the platform has
support for using any additional collations.
Fix up severely broken upper/lower/initcap functions, too: the C/POSIX
fastpath now does what it is supposed to, and non-default collations are
handled correctly in single-byte database encodings.
Merge the two separate collation hashtables that were being maintained in
pg_locale.c, and be more wary of the possibility that we fail partway
through filling a cache entry.
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/utils/adt/formatting.c | 163 | ||||
-rw-r--r-- | src/backend/utils/adt/pg_locale.c | 488 | ||||
-rw-r--r-- | src/bin/initdb/initdb.c | 18 | ||||
-rw-r--r-- | src/include/catalog/catversion.h | 2 | ||||
-rw-r--r-- | src/include/catalog/pg_collation.h | 15 | ||||
-rw-r--r-- | src/include/port.h | 2 | ||||
-rw-r--r-- | src/port/pgstrcasecmp.c | 26 |
7 files changed, 461 insertions, 253 deletions
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index aba11459bb1..54783103a2c 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1462,10 +1462,16 @@ str_numth(char *dest, char *num, int type) * in multibyte character sets. Note that in either case we are effectively * assuming that the database character encoding matches the encoding implied * by LC_CTYPE. + * + * If the system provides locale_t and associated functions (which are + * standardized by Open Group's XBD), we can support collations that are + * neither default nor C. The code is written to handle both combinations + * of have-wide-characters and have-locale_t, though it's rather unlikely + * a platform would have the latter without the former. */ /* - * wide-character-aware lower function + * collation-aware, wide-character-aware lower function * * We pass the number of bytes so we can pass varlena and char* * to this function. The result is a palloc'd, null-terminated string. @@ -1474,21 +1480,31 @@ char * str_tolower(const char *buff, size_t nbytes, Oid collid) { char *result; - pg_locale_t mylocale = 0; if (!buff) return NULL; - if (collid != DEFAULT_COLLATION_OID) - mylocale = pg_newlocale_from_collation(collid); + /* C/POSIX collations use this path regardless of database encoding */ + if (lc_ctype_is_c(collid)) + { + char *p; + + result = pnstrdup(buff, nbytes); + for (p = result; *p; p++) + *p = pg_ascii_tolower((unsigned char) *p); + } #ifdef USE_WIDE_UPPER_LOWER - if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collid)) + else if (pg_database_encoding_max_length() > 1) { + pg_locale_t mylocale = 0; wchar_t *workspace; size_t curr_char; size_t result_size; + if (collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + /* Overflow paranoia */ if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) ereport(ERROR, @@ -1501,12 +1517,14 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) char2wchar(workspace, nbytes + 1, buff, nbytes, collid); for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { #ifdef HAVE_LOCALE_T if (mylocale) workspace[curr_char] = towlower_l(workspace[curr_char], mylocale); else #endif - workspace[curr_char] = towlower(workspace[curr_char]); + workspace[curr_char] = towlower(workspace[curr_char]); + } /* Make result large enough; case change might change number of bytes */ result_size = curr_char * pg_database_encoding_max_length() + 1; @@ -1515,22 +1533,40 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) wchar2char(result, workspace, result_size, collid); pfree(workspace); } - else #endif /* USE_WIDE_UPPER_LOWER */ + else { + pg_locale_t mylocale = 0; char *p; + if (collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + result = pnstrdup(buff, nbytes); + /* + * Note: we assume that tolower_l() will not be so broken as to need + * an isupper_l() guard test. When using the default collation, we + * apply the traditional Postgres behavior that forces ASCII-style + * treatment of I/i, but in non-default collations you get exactly + * what the collation says. + */ for (p = result; *p; p++) - *p = pg_tolower((unsigned char) *p); + { +#ifdef HAVE_LOCALE_T + if (mylocale) + *p = tolower_l((unsigned char) *p, mylocale); + else +#endif + *p = pg_tolower((unsigned char) *p); + } } return result; } /* - * wide-character-aware upper function + * collation-aware, wide-character-aware upper function * * We pass the number of bytes so we can pass varlena and char* * to this function. The result is a palloc'd, null-terminated string. @@ -1539,21 +1575,31 @@ char * str_toupper(const char *buff, size_t nbytes, Oid collid) { char *result; - pg_locale_t mylocale = 0; if (!buff) return NULL; - if (collid != DEFAULT_COLLATION_OID) - mylocale = pg_newlocale_from_collation(collid); + /* C/POSIX collations use this path regardless of database encoding */ + if (lc_ctype_is_c(collid)) + { + char *p; + result = pnstrdup(buff, nbytes); + + for (p = result; *p; p++) + *p = pg_ascii_toupper((unsigned char) *p); + } #ifdef USE_WIDE_UPPER_LOWER - if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collid)) + else if (pg_database_encoding_max_length() > 1) { + pg_locale_t mylocale = 0; wchar_t *workspace; size_t curr_char; size_t result_size; + if (collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + /* Overflow paranoia */ if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) ereport(ERROR, @@ -1566,12 +1612,14 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) char2wchar(workspace, nbytes + 1, buff, nbytes, collid); for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { #ifdef HAVE_LOCALE_T if (mylocale) workspace[curr_char] = towupper_l(workspace[curr_char], mylocale); else #endif - workspace[curr_char] = towupper(workspace[curr_char]); + workspace[curr_char] = towupper(workspace[curr_char]); + } /* Make result large enough; case change might change number of bytes */ result_size = curr_char * pg_database_encoding_max_length() + 1; @@ -1580,22 +1628,40 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) wchar2char(result, workspace, result_size, collid); pfree(workspace); } - else #endif /* USE_WIDE_UPPER_LOWER */ + else { + pg_locale_t mylocale = 0; char *p; + if (collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + result = pnstrdup(buff, nbytes); + /* + * Note: we assume that toupper_l() will not be so broken as to need + * an islower_l() guard test. When using the default collation, we + * apply the traditional Postgres behavior that forces ASCII-style + * treatment of I/i, but in non-default collations you get exactly + * what the collation says. + */ for (p = result; *p; p++) - *p = pg_toupper((unsigned char) *p); + { +#ifdef HAVE_LOCALE_T + if (mylocale) + *p = toupper_l((unsigned char) *p, mylocale); + else +#endif + *p = pg_toupper((unsigned char) *p); + } } return result; } /* - * wide-character-aware initcap function + * collation-aware, wide-character-aware initcap function * * We pass the number of bytes so we can pass varlena and char* * to this function. The result is a palloc'd, null-terminated string. @@ -1605,21 +1671,42 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) { char *result; int wasalnum = false; - pg_locale_t mylocale = 0; if (!buff) return NULL; - if (collid != DEFAULT_COLLATION_OID) - mylocale = pg_newlocale_from_collation(collid); + /* C/POSIX collations use this path regardless of database encoding */ + if (lc_ctype_is_c(collid)) + { + char *p; + + result = pnstrdup(buff, nbytes); + for (p = result; *p; p++) + { + char c; + + if (wasalnum) + *p = c = pg_ascii_tolower((unsigned char) *p); + else + *p = c = pg_ascii_toupper((unsigned char) *p); + /* we don't trust isalnum() here */ + wasalnum = ((c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9')); + } + } #ifdef USE_WIDE_UPPER_LOWER - if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collid)) + else if (pg_database_encoding_max_length() > 1) { + pg_locale_t mylocale = 0; wchar_t *workspace; size_t curr_char; size_t result_size; + if (collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + /* Overflow paranoia */ if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) ereport(ERROR, @@ -1660,20 +1747,44 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) wchar2char(result, workspace, result_size, collid); pfree(workspace); } - else #endif /* USE_WIDE_UPPER_LOWER */ + else { + pg_locale_t mylocale = 0; char *p; + if (collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + result = pnstrdup(buff, nbytes); + /* + * Note: we assume that toupper_l()/tolower_l() will not be so broken + * as to need guard tests. When using the default collation, we apply + * the traditional Postgres behavior that forces ASCII-style treatment + * of I/i, but in non-default collations you get exactly what the + * collation says. + */ for (p = result; *p; p++) { - if (wasalnum) - *p = pg_tolower((unsigned char) *p); +#ifdef HAVE_LOCALE_T + if (mylocale) + { + if (wasalnum) + *p = tolower_l((unsigned char) *p, mylocale); + else + *p = toupper_l((unsigned char) *p, mylocale); + wasalnum = isalnum_l((unsigned char) *p, mylocale); + } else - *p = pg_toupper((unsigned char) *p); - wasalnum = isalnum((unsigned char) *p); +#endif + { + if (wasalnum) + *p = pg_tolower((unsigned char) *p); + else + *p = pg_toupper((unsigned char) *p); + wasalnum = isalnum((unsigned char) *p); + } } } diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 2b9b321b263..15d347c4f89 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -99,15 +99,24 @@ static char lc_monetary_envbuf[LC_ENV_BUFSIZE]; static char lc_numeric_envbuf[LC_ENV_BUFSIZE]; static char lc_time_envbuf[LC_ENV_BUFSIZE]; +/* Cache for collation-related knowledge */ + +typedef struct +{ + Oid collid; /* hash key: pg_collation OID */ + bool collate_is_c; /* is collation's LC_COLLATE C? */ + bool ctype_is_c; /* is collation's LC_CTYPE C? */ + bool flags_valid; /* true if above flags are valid */ + pg_locale_t locale; /* locale_t struct, or 0 if not valid */ +} collation_cache_entry; + +static HTAB *collation_cache = NULL; + + #if defined(WIN32) && defined(LC_MESSAGES) static char *IsoLocaleName(const char *); /* MSVC specific */ #endif -static HTAB *locale_cness_cache = NULL; -#ifdef HAVE_LOCALE_T -static HTAB *locale_t_cache = NULL; -#endif - /* * pg_perm_setlocale @@ -313,136 +322,6 @@ locale_messages_assign(const char *value, bool doit, GucSource source) /* - * We'd like to cache whether LC_COLLATE or LC_CTYPE is C (or POSIX), - * so we can optimize a few code paths in various places. - * - * Note that some code relies on this not reporting false negatives - * (that is, saying it's not C when it is). For example, char2wchar() - * could fail if the locale is C, so str_tolower() shouldn't call it - * in that case. - */ - -struct locale_cness_cache_entry -{ - Oid collid; - bool collate_is_c; - bool ctype_is_c; -}; - -static void -init_locale_cness_cache(void) -{ - HASHCTL ctl; - - memset(&ctl, 0, sizeof(ctl)); - ctl.keysize = sizeof(Oid); - ctl.entrysize = sizeof(struct locale_cness_cache_entry); - ctl.hash = oid_hash; - locale_cness_cache = hash_create("locale C-ness cache", 1000, &ctl, HASH_ELEM | HASH_FUNCTION); -} - -/* - * Handle caching of locale "C-ness" for nondefault collation objects. - * Relying on the system cache directly isn't fast enough. - */ -static bool -lookup_collation_cness(Oid collation, int category) -{ - struct locale_cness_cache_entry *cache_entry; - bool found; - HeapTuple tp; - char *localeptr; - - Assert(OidIsValid(collation)); - Assert(category == LC_COLLATE || category == LC_CTYPE); - - if (!locale_cness_cache) - init_locale_cness_cache(); - - cache_entry = hash_search(locale_cness_cache, &collation, HASH_ENTER, &found); - if (found) - { - if (category == LC_COLLATE) - return cache_entry->collate_is_c; - else - return cache_entry->ctype_is_c; - } - - tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation)); - if (!HeapTupleIsValid(tp)) - elog(ERROR, "cache lookup failed for collation %u", collation); - - localeptr = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate); - cache_entry->collate_is_c = (strcmp(localeptr, "C") == 0) || (strcmp(localeptr, "POSIX") == 0); - - localeptr = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype); - cache_entry->ctype_is_c = (strcmp(localeptr, "C") == 0) || (strcmp(localeptr, "POSIX") == 0); - - ReleaseSysCache(tp); - - return category == LC_COLLATE ? cache_entry->collate_is_c : cache_entry->ctype_is_c; -} - - -bool -lc_collate_is_c(Oid collation) -{ - /* Cache result so we only have to compute it once */ - static int result = -1; - char *localeptr; - - if (!OidIsValid(collation)) - return false; - - if (collation != DEFAULT_COLLATION_OID) - return lookup_collation_cness(collation, LC_COLLATE); - - if (result >= 0) - return (bool) result; - localeptr = setlocale(LC_COLLATE, NULL); - if (!localeptr) - elog(ERROR, "invalid LC_COLLATE setting"); - - if (strcmp(localeptr, "C") == 0) - result = true; - else if (strcmp(localeptr, "POSIX") == 0) - result = true; - else - result = false; - return (bool) result; -} - - -bool -lc_ctype_is_c(Oid collation) -{ - /* Cache result so we only have to compute it once */ - static int result = -1; - char *localeptr; - - if (!OidIsValid(collation)) - return false; - - if (collation != DEFAULT_COLLATION_OID) - return lookup_collation_cness(collation, LC_CTYPE); - - if (result >= 0) - return (bool) result; - localeptr = setlocale(LC_CTYPE, NULL); - if (!localeptr) - elog(ERROR, "invalid LC_CTYPE setting"); - - if (strcmp(localeptr, "C") == 0) - result = true; - else if (strcmp(localeptr, "POSIX") == 0) - result = true; - else - result = false; - return (bool) result; -} - - -/* * Frees the malloced content of a struct lconv. (But not the struct * itself.) */ @@ -844,116 +723,295 @@ IsoLocaleName(const char *winlocname) #endif /* WIN32 && LC_MESSAGES */ -#ifdef HAVE_LOCALE_T -struct locale_t_cache_entry +/* + * Cache mechanism for collation information. + * + * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C + * (or POSIX), so we can optimize a few code paths in various places. + * For the built-in C and POSIX collations, we can know that without even + * doing a cache lookup, but we want to support aliases for C/POSIX too. + * For the "default" collation, there are separate static cache variables, + * since consulting the pg_collation catalog doesn't tell us what we need. + * + * Also, if a pg_locale_t has been requested for a collation, we cache that + * for the life of a backend. + * + * Note that some code relies on the flags not reporting false negatives + * (that is, saying it's not C when it is). For example, char2wchar() + * could fail if the locale is C, so str_tolower() shouldn't call it + * in that case. + * + * Note that we currently lack any way to flush the cache. Since we don't + * support ALTER COLLATION, this is OK. The worst case is that someone + * drops a collation, and a useless cache entry hangs around in existing + * backends. + */ + +static collation_cache_entry * +lookup_collation_cache(Oid collation, bool set_flags) { - Oid collid; - locale_t locale; -}; + collation_cache_entry *cache_entry; + bool found; -static void -init_locale_t_cache(void) + Assert(OidIsValid(collation)); + Assert(collation != DEFAULT_COLLATION_OID); + + if (collation_cache == NULL) + { + /* First time through, initialize the hash table */ + HASHCTL ctl; + + memset(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(collation_cache_entry); + ctl.hash = oid_hash; + collation_cache = hash_create("Collation cache", 100, &ctl, + HASH_ELEM | HASH_FUNCTION); + } + + cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found); + if (!found) + { + /* + * Make sure cache entry is marked invalid, in case we fail before + * setting things. + */ + cache_entry->flags_valid = false; + cache_entry->locale = 0; + } + + if (set_flags && !cache_entry->flags_valid) + { + /* Attempt to set the flags */ + HeapTuple tp; + Form_pg_collation collform; + const char *collcollate; + const char *collctype; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", collation); + collform = (Form_pg_collation) GETSTRUCT(tp); + + collcollate = NameStr(collform->collcollate); + collctype = NameStr(collform->collctype); + + cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) || + (strcmp(collcollate, "POSIX") == 0)); + cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) || + (strcmp(collctype, "POSIX") == 0)); + + cache_entry->flags_valid = true; + + ReleaseSysCache(tp); + } + + return cache_entry; +} + + +/* + * Detect whether collation's LC_COLLATE property is C + */ +bool +lc_collate_is_c(Oid collation) { - HASHCTL ctl; + /* + * If we're asked about "collation 0", return false, so that the code + * will go into the non-C path and report that the collation is bogus. + */ + if (!OidIsValid(collation)) + return false; + + /* + * If we're asked about the default collation, we have to inquire of + * the C library. Cache the result so we only have to compute it once. + */ + if (collation == DEFAULT_COLLATION_OID) + { + static int result = -1; + char *localeptr; + + if (result >= 0) + return (bool) result; + localeptr = setlocale(LC_COLLATE, NULL); + if (!localeptr) + elog(ERROR, "invalid LC_COLLATE setting"); + + if (strcmp(localeptr, "C") == 0) + result = true; + else if (strcmp(localeptr, "POSIX") == 0) + result = true; + else + result = false; + return (bool) result; + } + + /* + * If we're asked about the built-in C/POSIX collations, we know that. + */ + if (collation == C_COLLATION_OID || + collation == POSIX_COLLATION_OID) + return true; + + /* + * Otherwise, we have to consult pg_collation, but we cache that. + */ + return (lookup_collation_cache(collation, true))->collate_is_c; +} + +/* + * Detect whether collation's LC_CTYPE property is C + */ +bool +lc_ctype_is_c(Oid collation) +{ + /* + * If we're asked about "collation 0", return false, so that the code + * will go into the non-C path and report that the collation is bogus. + */ + if (!OidIsValid(collation)) + return false; + + /* + * If we're asked about the default collation, we have to inquire of + * the C library. Cache the result so we only have to compute it once. + */ + if (collation == DEFAULT_COLLATION_OID) + { + static int result = -1; + char *localeptr; + + if (result >= 0) + return (bool) result; + localeptr = setlocale(LC_CTYPE, NULL); + if (!localeptr) + elog(ERROR, "invalid LC_CTYPE setting"); + + if (strcmp(localeptr, "C") == 0) + result = true; + else if (strcmp(localeptr, "POSIX") == 0) + result = true; + else + result = false; + return (bool) result; + } + + /* + * If we're asked about the built-in C/POSIX collations, we know that. + */ + if (collation == C_COLLATION_OID || + collation == POSIX_COLLATION_OID) + return true; - memset(&ctl, 0, sizeof(ctl)); - ctl.keysize = sizeof(Oid); - ctl.entrysize = sizeof(struct locale_t_cache_entry); - ctl.hash = oid_hash; - locale_t_cache = hash_create("locale_t cache", 1000, &ctl, HASH_ELEM | HASH_FUNCTION); + /* + * Otherwise, we have to consult pg_collation, but we cache that. + */ + return (lookup_collation_cache(collation, true))->ctype_is_c; } -#endif /* HAVE_LOCALE_T */ + /* * Create a locale_t from a collation OID. Results are cached for the - * lifetime of the backend. Thus, do not free the result with - * freelocale(). + * lifetime of the backend. Thus, do not free the result with freelocale(). * - * As a special optimization, the default/database collation returns - * 0. Callers should then revert to the non-locale_t-enabled code - * path. In fact, they shouldn't call this function at all when they - * are dealing with the default locale. That can save quite a bit in - * hotspots. + * As a special optimization, the default/database collation returns 0. + * Callers should then revert to the non-locale_t-enabled code path. + * In fact, they shouldn't call this function at all when they are dealing + * with the default locale. That can save quite a bit in hotspots. + * Also, callers should avoid calling this before going down a C/POSIX + * fastpath, because such a fastpath should work even on platforms without + * locale_t support in the C library. * * For simplicity, we always generate COLLATE + CTYPE even though we - * might only need one of them. Since this is called only once per - * session, it shouldn't cost much. + * might only need one of them. Since this is called only once per session, + * it shouldn't cost much. */ pg_locale_t pg_newlocale_from_collation(Oid collid) { -#ifdef HAVE_LOCALE_T - HeapTuple tp; - const char *collcollate; - const char *collctype; - locale_t result; - struct locale_t_cache_entry *cache_entry; - bool found; + collation_cache_entry *cache_entry; + /* Return 0 for "default" collation, just in case caller forgets */ if (collid == DEFAULT_COLLATION_OID) - return (locale_t) 0; + return (pg_locale_t) 0; + /* + * This is where we'll fail if a collation-aware function is invoked + * and no collation OID is passed. This typically means that the + * parser could not resolve a conflict of implicit collations, so + * report it that way. + */ if (!OidIsValid(collid)) - elog(ERROR, "locale operation to be invoked, but no collation was derived"); + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("locale operation to be invoked, but no collation was derived"))); - if (!locale_t_cache) - init_locale_t_cache(); + cache_entry = lookup_collation_cache(collid, false); - cache_entry = hash_search(locale_t_cache, &collid, HASH_ENTER, &found); - if (found) - return cache_entry->locale; + if (cache_entry->locale == 0) + { + /* We haven't computed this yet in this session, so do it */ +#ifdef HAVE_LOCALE_T + HeapTuple tp; + Form_pg_collation collform; + const char *collcollate; + const char *collctype; + locale_t result; - tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); - if (!HeapTupleIsValid(tp)) - elog(ERROR, "cache lookup failed for collation %u", collid); + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", collid); + collform = (Form_pg_collation) GETSTRUCT(tp); - collcollate = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate); - collctype = NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype); + collcollate = NameStr(collform->collcollate); + collctype = NameStr(collform->collctype); - if (strcmp(collcollate, collctype) == 0) - { - result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, NULL); - if (!result) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not create locale \"%s\": %m", collcollate))); - } - else - { - locale_t loc1; - - loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL); - if (!loc1) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not create locale \"%s\": %m", collcollate))); - result = newlocale(LC_CTYPE_MASK, collctype, loc1); - if (!result) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not create locale \"%s\": %m", collctype))); - } + if (strcmp(collcollate, collctype) == 0) + { + /* Normal case where they're the same */ + result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, + NULL); + if (!result) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create locale \"%s\": %m", + collcollate))); + } + else + { + /* We need two newlocale() steps */ + locale_t loc1; + + loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL); + if (!loc1) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create locale \"%s\": %m", + collcollate))); + result = newlocale(LC_CTYPE_MASK, collctype, loc1); + if (!result) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create locale \"%s\": %m", + collctype))); + } - ReleaseSysCache(tp); + cache_entry->locale = result; - cache_entry->locale = result; + ReleaseSysCache(tp); - return result; #else /* not HAVE_LOCALE_T */ - /* - * For platforms that don't support locale_t, check that we are - * dealing with the default locale. It's unlikely that we'll get - * here, but it's possible if users are creating collations even - * though they are not supported, or they are mixing builds in odd - * ways. - */ - if (!OidIsValid(collid)) - elog(ERROR, "locale operation to be invoked, but no collation was derived"); - else if (collid != DEFAULT_COLLATION_OID) + + /* + * For platforms that don't support locale_t, we can't do anything + * with non-default collations. + */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("nondefault collations are not supported on this platform"))); - - return 0; #endif /* not HAVE_LOCALE_T */ + } + + return cache_entry->locale; } diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index d509b1311d1..98e864d5495 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -1616,17 +1616,21 @@ setup_collation(void) */ skip = false; for (i = 0; i < len; i++) + { if (IS_HIGHBIT_SET(localebuf[i])) { - if (debug) - fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: %s\n"), - progname, localebuf); - skipped++; skip = true; break; } + } if (skip) + { + if (debug) + fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: %s\n"), + progname, localebuf); + skipped++; continue; + } enc = pg_get_encoding_from_locale(localebuf, debug); if (enc < 0) @@ -1635,7 +1639,7 @@ setup_collation(void) continue; /* error message printed by pg_get_encoding_from_locale() */ } if (enc == PG_SQL_ASCII) - continue; /* SQL_ASCII is handled separately */ + continue; /* C/POSIX are already in the catalog */ PG_CMD_PRINTF2("INSERT INTO tmp_pg_collation (locale, encoding) VALUES ('%s', %d);", escape_quotes(localebuf), enc); @@ -1651,10 +1655,6 @@ setup_collation(void) escape_quotes(alias), escape_quotes(localebuf), enc); } - for (i = PG_SQL_ASCII; i <= PG_ENCODING_BE_LAST; i++) - PG_CMD_PRINTF2("INSERT INTO tmp_pg_collation (locale, encoding) VALUES ('C', %d), ('POSIX', %d);", - i, i); - /* Add an SQL-standard name */ PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation (collname, locale, encoding) VALUES ('ucs_basic', 'C', %d);", PG_UTF8); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index c10de537bec..e9659092198 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201103191 +#define CATALOG_VERSION_NO 201103201 #endif diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h index e90aa050f54..2ab0c504f65 100644 --- a/src/include/catalog/pg_collation.h +++ b/src/include/catalog/pg_collation.h @@ -58,8 +58,19 @@ typedef FormData_pg_collation *Form_pg_collation; #define Anum_pg_collation_collcollate 5 #define Anum_pg_collation_collctype 6 -DATA(insert OID = 100 ( default PGNSP PGUID -1 "" "" )); +/* ---------------- + * initial contents of pg_collation + * ---------------- + */ + +DATA(insert OID = 100 ( default PGNSP PGUID -1 "" "" )); DESCR("database's default collation"); -#define DEFAULT_COLLATION_OID 100 +#define DEFAULT_COLLATION_OID 100 +DATA(insert OID = 950 ( C PGNSP PGUID -1 "C" "C" )); +DESCR("standard C collation"); +#define C_COLLATION_OID 950 +DATA(insert OID = 951 ( POSIX PGNSP PGUID -1 "POSIX" "POSIX" )); +DESCR("standard POSIX collation"); +#define POSIX_COLLATION_OID 951 #endif /* PG_COLLATION_H */ diff --git a/src/include/port.h b/src/include/port.h index 9d08b392ce1..1116a92fd1f 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -155,6 +155,8 @@ extern int pg_strcasecmp(const char *s1, const char *s2); extern int pg_strncasecmp(const char *s1, const char *s2, size_t n); extern unsigned char pg_toupper(unsigned char ch); extern unsigned char pg_tolower(unsigned char ch); +extern unsigned char pg_ascii_toupper(unsigned char ch); +extern unsigned char pg_ascii_tolower(unsigned char ch); #ifdef USE_REPL_SNPRINTF diff --git a/src/port/pgstrcasecmp.c b/src/port/pgstrcasecmp.c index 1680124df0d..f6e226f0f2c 100644 --- a/src/port/pgstrcasecmp.c +++ b/src/port/pgstrcasecmp.c @@ -13,6 +13,10 @@ * * NB: this code should match downcase_truncate_identifier() in scansup.c. * + * We also provide strict ASCII-only case conversion functions, which can + * be used to implement C/POSIX case folding semantics no matter what the + * C library thinks the locale is. + * * * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group * @@ -123,3 +127,25 @@ pg_tolower(unsigned char ch) ch = tolower(ch); return ch; } + +/* + * Fold a character to upper case, following C/POSIX locale rules. + */ +unsigned char +pg_ascii_toupper(unsigned char ch) +{ + if (ch >= 'a' && ch <= 'z') + ch += 'A' - 'a'; + return ch; +} + +/* + * Fold a character to lower case, following C/POSIX locale rules. + */ +unsigned char +pg_ascii_tolower(unsigned char ch) +{ + if (ch >= 'A' && ch <= 'Z') + ch += 'a' - 'A'; + return ch; +} |