diff options
Diffstat (limited to 'src/backend/commands/collationcmds.c')
-rw-r--r-- | src/backend/commands/collationcmds.c | 327 |
1 files changed, 210 insertions, 117 deletions
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 1a62b3e30aa..a0b3b238163 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -37,6 +37,14 @@ #include "utils/syscache.h" +typedef struct +{ + char *localename; /* name of locale, as per "locale -a" */ + char *alias; /* shortened alias for same */ + int enc; /* encoding */ +} CollAliasData; + + /* * CREATE COLLATION */ @@ -196,7 +204,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collcollate, collctype, collversion, - if_not_exists); + if_not_exists, + false); /* not quiet */ if (!OidIsValid(newoid)) return InvalidObjectAddress; @@ -344,13 +353,18 @@ pg_collation_actual_version(PG_FUNCTION_ARGS) } +/* will we use "locale -a" in pg_import_system_collations? */ +#if defined(HAVE_LOCALE_T) && !defined(WIN32) +#define READ_LOCALE_A_OUTPUT +#endif + +#ifdef READ_LOCALE_A_OUTPUT /* * "Normalize" a libc locale name, stripping off encoding tags such as * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro" * -> "br_FR@euro"). Return true if a new, different name was * generated. */ -pg_attribute_unused() static bool normalize_libc_locale_name(char *new, const char *old) { @@ -379,6 +393,20 @@ normalize_libc_locale_name(char *new, const char *old) return changed; } +/* + * qsort comparator for CollAliasData items + */ +static int +cmpaliases(const void *a, const void *b) +{ + const CollAliasData *ca = (const CollAliasData *) a; + const CollAliasData *cb = (const CollAliasData *) b; + + /* comparing localename is enough because other fields are derived */ + return strcmp(ca->localename, cb->localename); +} +#endif /* READ_LOCALE_A_OUTPUT */ + #ifdef USE_ICU /* @@ -429,140 +457,190 @@ get_icu_locale_comment(const char *localename) #endif /* USE_ICU */ +/* + * pg_import_system_collations: add known system collations to pg_collation + */ Datum pg_import_system_collations(PG_FUNCTION_ARGS) { - bool if_not_exists = PG_GETARG_BOOL(0); - Oid nspid = PG_GETARG_OID(1); + Oid nspid = PG_GETARG_OID(0); + int ncreated = 0; -#if defined(HAVE_LOCALE_T) && !defined(WIN32) - FILE *locale_a_handle; - char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */ - int count = 0; - List *aliaslist = NIL; - List *localelist = NIL; - List *enclist = NIL; - ListCell *lca, - *lcl, - *lce; -#endif + /* silence compiler warning if we have no locale implementation at all */ + (void) nspid; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to import system collations")))); -#if !(defined(HAVE_LOCALE_T) && !defined(WIN32)) && !defined(USE_ICU) - /* silence compiler warnings */ - (void) if_not_exists; - (void) nspid; -#endif + /* Load collations known to libc, using "locale -a" to enumerate them */ +#ifdef READ_LOCALE_A_OUTPUT + { + FILE *locale_a_handle; + char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */ + int nvalid = 0; + Oid collid; + CollAliasData *aliases; + int naliases, + maxaliases, + i; + + /* expansible array of aliases */ + maxaliases = 100; + aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData)); + naliases = 0; + + locale_a_handle = OpenPipeStream("locale -a", "r"); + if (locale_a_handle == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not execute command \"%s\": %m", + "locale -a"))); -#if defined(HAVE_LOCALE_T) && !defined(WIN32) - locale_a_handle = OpenPipeStream("locale -a", "r"); - if (locale_a_handle == NULL) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not execute command \"%s\": %m", - "locale -a"))); + while (fgets(localebuf, sizeof(localebuf), locale_a_handle)) + { + size_t len; + int enc; + bool skip; + char alias[NAMEDATALEN]; - while (fgets(localebuf, sizeof(localebuf), locale_a_handle)) - { - int i; - size_t len; - int enc; - bool skip; - char alias[NAMEDATALEN]; + len = strlen(localebuf); - len = strlen(localebuf); + if (len == 0 || localebuf[len - 1] != '\n') + { + elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf); + continue; + } + localebuf[len - 1] = '\0'; - if (len == 0 || localebuf[len - 1] != '\n') - { - elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf); - continue; - } - localebuf[len - 1] = '\0'; + /* + * Some systems have locale names that don't consist entirely of + * ASCII letters (such as "bokmål" or "français"). + * This is pretty silly, since we need the locale itself to + * interpret the non-ASCII characters. We can't do much with + * those, so we filter them out. + */ + skip = false; + for (i = 0; i < len; i++) + { + if (IS_HIGHBIT_SET(localebuf[i])) + { + skip = true; + break; + } + } + if (skip) + { + elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf); + continue; + } - /* - * Some systems have locale names that don't consist entirely of ASCII - * letters (such as "bokmål" or "français"). This is - * pretty silly, since we need the locale itself to interpret the - * non-ASCII characters. We can't do much with those, so we filter - * them out. - */ - skip = false; - for (i = 0; i < len; i++) - { - if (IS_HIGHBIT_SET(localebuf[i])) + enc = pg_get_encoding_from_locale(localebuf, false); + if (enc < 0) { - skip = true; - break; + /* error message printed by pg_get_encoding_from_locale() */ + continue; } - } - if (skip) - { - elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf); - continue; - } + if (!PG_VALID_BE_ENCODING(enc)) + continue; /* ignore locales for client-only encodings */ + if (enc == PG_SQL_ASCII) + continue; /* C/POSIX are already in the catalog */ - enc = pg_get_encoding_from_locale(localebuf, false); - if (enc < 0) - { - /* error message printed by pg_get_encoding_from_locale() */ - continue; - } - if (!PG_VALID_BE_ENCODING(enc)) - continue; /* ignore locales for client-only encodings */ - if (enc == PG_SQL_ASCII) - continue; /* C/POSIX are already in the catalog */ + /* count valid locales found in operating system */ + nvalid++; - count++; + /* + * Create a collation named the same as the locale, but quietly + * doing nothing if it already exists. This is the behavior we + * need even at initdb time, because some versions of "locale -a" + * can report the same locale name more than once. And it's + * convenient for later import runs, too, since you just about + * always want to add on new locales without a lot of chatter + * about existing ones. + */ + collid = CollationCreate(localebuf, nspid, GetUserId(), + COLLPROVIDER_LIBC, enc, + localebuf, localebuf, + get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), + true, true); + if (OidIsValid(collid)) + { + ncreated++; + + /* Must do CCI between inserts to handle duplicates correctly */ + CommandCounterIncrement(); + } - CollationCreate(localebuf, nspid, GetUserId(), COLLPROVIDER_LIBC, enc, - localebuf, localebuf, - get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), - if_not_exists); + /* + * Generate aliases such as "en_US" in addition to "en_US.utf8" + * for ease of use. Note that collation names are unique per + * encoding only, so this doesn't clash with "en_US" for LATIN1, + * say. + * + * However, it might conflict with a name we'll see later in the + * "locale -a" output. So save up the aliases and try to add them + * after we've read all the output. + */ + if (normalize_libc_locale_name(alias, localebuf)) + { + if (naliases >= maxaliases) + { + maxaliases *= 2; + aliases = (CollAliasData *) + repalloc(aliases, maxaliases * sizeof(CollAliasData)); + } + aliases[naliases].localename = pstrdup(localebuf); + aliases[naliases].alias = pstrdup(alias); + aliases[naliases].enc = enc; + naliases++; + } + } - CommandCounterIncrement(); + ClosePipeStream(locale_a_handle); /* - * Generate aliases such as "en_US" in addition to "en_US.utf8" for - * ease of use. Note that collation names are unique per encoding - * only, so this doesn't clash with "en_US" for LATIN1, say. - * - * However, it might conflict with a name we'll see later in the - * "locale -a" output. So save up the aliases and try to add them - * after we've read all the output. + * Before processing the aliases, sort them by locale name. The point + * here is that if "locale -a" gives us multiple locale names with the + * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we + * want to pick a deterministic one of them. First in ASCII sort + * order is a good enough rule. (Before PG 10, the code corresponding + * to this logic in initdb.c had an additional ordering rule, to + * prefer the locale name exactly matching the alias, if any. We + * don't need to consider that here, because we would have already + * created such a pg_collation entry above, and that one will win.) */ - if (normalize_libc_locale_name(alias, localebuf)) + if (naliases > 1) + qsort((void *) aliases, naliases, sizeof(CollAliasData), cmpaliases); + + /* Now add aliases, ignoring any that match pre-existing entries */ + for (i = 0; i < naliases; i++) { - aliaslist = lappend(aliaslist, pstrdup(alias)); - localelist = lappend(localelist, pstrdup(localebuf)); - enclist = lappend_int(enclist, enc); - } - } + char *locale = aliases[i].localename; + char *alias = aliases[i].alias; + int enc = aliases[i].enc; + + collid = CollationCreate(alias, nspid, GetUserId(), + COLLPROVIDER_LIBC, enc, + locale, locale, + get_collation_actual_version(COLLPROVIDER_LIBC, locale), + true, true); + if (OidIsValid(collid)) + { + ncreated++; - ClosePipeStream(locale_a_handle); + CommandCounterIncrement(); + } + } - /* Now try to add any aliases we created */ - forthree(lca, aliaslist, lcl, localelist, lce, enclist) - { - char *alias = (char *) lfirst(lca); - char *locale = (char *) lfirst(lcl); - int enc = lfirst_int(lce); - - CollationCreate(alias, nspid, GetUserId(), COLLPROVIDER_LIBC, enc, - locale, locale, - get_collation_actual_version(COLLPROVIDER_LIBC, locale), - true); - CommandCounterIncrement(); + /* Give a warning if "locale -a" seems to be malfunctioning */ + if (nvalid == 0) + ereport(WARNING, + (errmsg("no usable system locales were found"))); } +#endif /* READ_LOCALE_A_OUTPUT */ - if (count == 0) - ereport(WARNING, - (errmsg("no usable system locales were found"))); -#endif /* not HAVE_LOCALE_T && not WIN32 */ - + /* Load collations known to ICU */ #ifdef USE_ICU if (!is_encoding_supported_by_icu(GetDatabaseEncoding())) { @@ -597,13 +675,20 @@ pg_import_system_collations(PG_FUNCTION_ARGS) langtag = get_icu_language_tag(name); collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name; collid = CollationCreate(psprintf("%s-x-icu", langtag), - nspid, GetUserId(), COLLPROVIDER_ICU, -1, + nspid, GetUserId(), + COLLPROVIDER_ICU, -1, collcollate, collcollate, get_collation_actual_version(COLLPROVIDER_ICU, collcollate), - if_not_exists); + true, true); + if (OidIsValid(collid)) + { + ncreated++; - CreateComments(collid, CollationRelationId, 0, - get_icu_locale_comment(name)); + CommandCounterIncrement(); + + CreateComments(collid, CollationRelationId, 0, + get_icu_locale_comment(name)); + } /* * Add keyword variants @@ -624,12 +709,20 @@ pg_import_system_collations(PG_FUNCTION_ARGS) langtag = get_icu_language_tag(localeid); collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : localeid; collid = CollationCreate(psprintf("%s-x-icu", langtag), - nspid, GetUserId(), COLLPROVIDER_ICU, -1, + nspid, GetUserId(), + COLLPROVIDER_ICU, -1, collcollate, collcollate, get_collation_actual_version(COLLPROVIDER_ICU, collcollate), - if_not_exists); - CreateComments(collid, CollationRelationId, 0, - get_icu_locale_comment(localeid)); + true, true); + if (OidIsValid(collid)) + { + ncreated++; + + CommandCounterIncrement(); + + CreateComments(collid, CollationRelationId, 0, + get_icu_locale_comment(localeid)); + } } if (U_FAILURE(status)) ereport(ERROR, @@ -638,7 +731,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) uenum_close(en); } } -#endif +#endif /* USE_ICU */ - PG_RETURN_VOID(); + PG_RETURN_INT32(ncreated); } |