diff options
Diffstat (limited to 'src')
34 files changed, 2998 insertions, 393 deletions
diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 8e1d6e3bd44..4acf7d2f060 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -179,6 +179,7 @@ pgxsdir = $(pkglibdir)/pgxs # # Records the choice of the various --enable-xxx and --with-xxx options. +with_icu = @with_icu@ with_perl = @with_perl@ with_python = @with_python@ with_tcl = @with_tcl@ @@ -208,6 +209,9 @@ python_version = @python_version@ krb_srvtab = @krb_srvtab@ +ICU_CFLAGS = @ICU_CFLAGS@ +ICU_LIBS = @ICU_LIBS@ + TCLSH = @TCLSH@ TCL_LIBS = @TCL_LIBS@ TCL_LIB_SPEC = @TCL_LIB_SPEC@ diff --git a/src/backend/Makefile b/src/backend/Makefile index 7a0bbb29424..fffb0d95bad 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -58,7 +58,7 @@ ifneq ($(PORTNAME), win32) ifneq ($(PORTNAME), aix) postgres: $(OBJS) - $(CC) $(CFLAGS) $(LDFLAGS) $(LDFLAGS_EX) $(export_dynamic) $(call expand_subsys,$^) $(LIBS) -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $(LDFLAGS_EX) $(export_dynamic) $(call expand_subsys,$^) $(LIBS) $(ICU_LIBS) -o $@ endif endif diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index 65b6051c0d1..ede920955d7 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -27,6 +27,7 @@ #include "mb/pg_wchar.h" #include "utils/builtins.h" #include "utils/fmgroids.h" +#include "utils/pg_locale.h" #include "utils/rel.h" #include "utils/syscache.h" #include "utils/tqual.h" @@ -40,8 +41,10 @@ Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, + char collprovider, int32 collencoding, const char *collcollate, const char *collctype, + const char *collversion, bool if_not_exists) { Relation rel; @@ -78,29 +81,47 @@ CollationCreate(const char *collname, Oid collnamespace, { ereport(NOTICE, (errcode(ERRCODE_DUPLICATE_OBJECT), - errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping", - collname, pg_encoding_to_char(collencoding)))); + collencoding == -1 + ? errmsg("collation \"%s\" already exists, skipping", + collname) + : errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping", + collname, pg_encoding_to_char(collencoding)))); return InvalidOid; } else ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), - errmsg("collation \"%s\" for encoding \"%s\" already exists", - collname, pg_encoding_to_char(collencoding)))); + collencoding == -1 + ? errmsg("collation \"%s\" already exists", + collname) + : errmsg("collation \"%s\" for encoding \"%s\" already exists", + collname, pg_encoding_to_char(collencoding)))); } + /* open pg_collation; see below about the lock level */ + rel = heap_open(CollationRelationId, ShareRowExclusiveLock); + /* - * Also forbid matching an any-encoding entry. This test of course is not - * backed up by the unique index, but it's not a problem since we don't - * support adding any-encoding entries after initdb. + * Also forbid a specific-encoding collation shadowing an any-encoding + * collation, or an any-encoding collation being shadowed (see + * get_collation_name()). This test is not backed up by the unique index, + * so we take a ShareRowExclusiveLock earlier, to protect against + * concurrent changes fooling this check. */ - if (SearchSysCacheExists3(COLLNAMEENCNSP, - PointerGetDatum(collname), - Int32GetDatum(-1), - ObjectIdGetDatum(collnamespace))) + if ((collencoding == -1 && + SearchSysCacheExists3(COLLNAMEENCNSP, + PointerGetDatum(collname), + Int32GetDatum(GetDatabaseEncoding()), + ObjectIdGetDatum(collnamespace))) || + (collencoding != -1 && + SearchSysCacheExists3(COLLNAMEENCNSP, + PointerGetDatum(collname), + Int32GetDatum(-1), + ObjectIdGetDatum(collnamespace)))) { if (if_not_exists) { + heap_close(rel, NoLock); ereport(NOTICE, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("collation \"%s\" already exists, skipping", @@ -114,8 +135,6 @@ CollationCreate(const char *collname, Oid collnamespace, collname))); } - /* open pg_collation */ - rel = heap_open(CollationRelationId, RowExclusiveLock); tupDesc = RelationGetDescr(rel); /* form a tuple */ @@ -125,11 +144,16 @@ CollationCreate(const char *collname, Oid collnamespace, values[Anum_pg_collation_collname - 1] = NameGetDatum(&name_name); values[Anum_pg_collation_collnamespace - 1] = ObjectIdGetDatum(collnamespace); values[Anum_pg_collation_collowner - 1] = ObjectIdGetDatum(collowner); + values[Anum_pg_collation_collprovider - 1] = CharGetDatum(collprovider); values[Anum_pg_collation_collencoding - 1] = Int32GetDatum(collencoding); namestrcpy(&name_collate, collcollate); values[Anum_pg_collation_collcollate - 1] = NameGetDatum(&name_collate); namestrcpy(&name_ctype, collctype); values[Anum_pg_collation_collctype - 1] = NameGetDatum(&name_ctype); + if (collversion) + values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(collversion); + else + nulls[Anum_pg_collation_collversion - 1] = true; tup = heap_form_tuple(tupDesc, values, nulls); @@ -159,7 +183,7 @@ CollationCreate(const char *collname, Oid collnamespace, InvokeObjectPostCreateHook(CollationRelationId, oid, 0); heap_freetuple(tup); - heap_close(rel, RowExclusiveLock); + heap_close(rel, NoLock); return oid; } diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 919cfc6a067..835cb263db3 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -14,15 +14,18 @@ */ #include "postgres.h" +#include "access/heapam.h" #include "access/htup_details.h" #include "access/xact.h" #include "catalog/dependency.h" #include "catalog/indexing.h" #include "catalog/namespace.h" +#include "catalog/objectaccess.h" #include "catalog/pg_collation.h" #include "catalog/pg_collation_fn.h" #include "commands/alter.h" #include "commands/collationcmds.h" +#include "commands/comment.h" #include "commands/dbcommands.h" #include "commands/defrem.h" #include "mb/pg_wchar.h" @@ -33,6 +36,7 @@ #include "utils/rel.h" #include "utils/syscache.h" + /* * CREATE COLLATION */ @@ -47,8 +51,14 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e DefElem *localeEl = NULL; DefElem *lccollateEl = NULL; DefElem *lcctypeEl = NULL; + DefElem *providerEl = NULL; + DefElem *versionEl = NULL; char *collcollate = NULL; char *collctype = NULL; + char *collproviderstr = NULL; + int collencoding; + char collprovider = 0; + char *collversion = NULL; Oid newoid; ObjectAddress address; @@ -72,6 +82,10 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e defelp = &lccollateEl; else if (pg_strcasecmp(defel->defname, "lc_ctype") == 0) defelp = &lcctypeEl; + else if (pg_strcasecmp(defel->defname, "provider") == 0) + defelp = &providerEl; + else if (pg_strcasecmp(defel->defname, "version") == 0) + defelp = &versionEl; else { ereport(ERROR, @@ -103,6 +117,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate)); collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype)); + collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; ReleaseSysCache(tp); } @@ -119,6 +134,27 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e if (lcctypeEl) collctype = defGetString(lcctypeEl); + if (providerEl) + collproviderstr = defGetString(providerEl); + + if (versionEl) + collversion = defGetString(versionEl); + + if (collproviderstr) + { + if (pg_strcasecmp(collproviderstr, "icu") == 0) + collprovider = COLLPROVIDER_ICU; + else if (pg_strcasecmp(collproviderstr, "libc") == 0) + collprovider = COLLPROVIDER_LIBC; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("unrecognized collation provider: %s", + collproviderstr))); + } + else if (!fromEl) + collprovider = COLLPROVIDER_LIBC; + if (!collcollate) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), @@ -129,14 +165,25 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("parameter \"lc_ctype\" must be specified"))); - check_encoding_locale_matches(GetDatabaseEncoding(), collcollate, collctype); + if (collprovider == COLLPROVIDER_ICU) + collencoding = -1; + else + { + collencoding = GetDatabaseEncoding(); + check_encoding_locale_matches(collencoding, collcollate, collctype); + } + + if (!collversion) + collversion = get_collation_actual_version(collprovider, collcollate); newoid = CollationCreate(collName, collNamespace, GetUserId(), - GetDatabaseEncoding(), + collprovider, + collencoding, collcollate, collctype, + collversion, if_not_exists); if (!OidIsValid(newoid)) @@ -182,16 +229,118 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid) collname, get_namespace_name(nspOid)))); } +/* + * ALTER COLLATION + */ +ObjectAddress +AlterCollation(AlterCollationStmt *stmt) +{ + Relation rel; + Oid collOid; + HeapTuple tup; + Form_pg_collation collForm; + Datum collversion; + bool isnull; + char *oldversion; + char *newversion; + ObjectAddress address; + + rel = heap_open(CollationRelationId, RowExclusiveLock); + collOid = get_collation_oid(stmt->collname, false); + + if (!pg_collation_ownercheck(collOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_COLLATION, + NameListToString(stmt->collname)); + + tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for collation %u", collOid); + + collForm = (Form_pg_collation) GETSTRUCT(tup); + collversion = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, + &isnull); + oldversion = isnull ? NULL : TextDatumGetCString(collversion); + + newversion = get_collation_actual_version(collForm->collprovider, NameStr(collForm->collcollate)); + + /* cannot change from NULL to non-NULL or vice versa */ + if ((!oldversion && newversion) || (oldversion && !newversion)) + elog(ERROR, "invalid collation version change"); + else if (oldversion && newversion && strcmp(newversion, oldversion) != 0) + { + bool nulls[Natts_pg_collation]; + bool replaces[Natts_pg_collation]; + Datum values[Natts_pg_collation]; + + ereport(NOTICE, + (errmsg("changing version from %s to %s", + oldversion, newversion))); + + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + memset(replaces, false, sizeof(replaces)); + + values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion); + replaces[Anum_pg_collation_collversion - 1] = true; + + tup = heap_modify_tuple(tup, RelationGetDescr(rel), + values, nulls, replaces); + } + else + ereport(NOTICE, + (errmsg("version has not changed"))); + + CatalogTupleUpdate(rel, &tup->t_self, tup); + + InvokeObjectPostAlterHook(CollationRelationId, collOid, 0); + + ObjectAddressSet(address, CollationRelationId, collOid); + + heap_freetuple(tup); + heap_close(rel, NoLock); + + return address; +} + + +Datum +pg_collation_actual_version(PG_FUNCTION_ARGS) +{ + Oid collid = PG_GETARG_OID(0); + HeapTuple tp; + char *collcollate; + char collprovider; + char *version; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); + if (!HeapTupleIsValid(tp)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("collation with OID %u does not exist", collid))); + + collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate)); + collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; + + ReleaseSysCache(tp); + + version = get_collation_actual_version(collprovider, collcollate); + + if (version) + PG_RETURN_TEXT_P(cstring_to_text(version)); + else + PG_RETURN_NULL(); +} + /* - * "Normalize" a locale name, stripping off encoding tags such as + * "Normalize" a libc locale name, stripping off encoding tags such as * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro" * -> "br_FR@euro"). Return true if a new, different name was * generated. */ pg_attribute_unused() static bool -normalize_locale_name(char *new, const char *old) +normalize_libc_locale_name(char *new, const char *old) { char *n = new; const char *o = old; @@ -219,6 +368,46 @@ normalize_locale_name(char *new, const char *old) } +#ifdef USE_ICU +static char * +get_icu_language_tag(const char *localename) +{ + char buf[ULOC_FULLNAME_CAPACITY]; + UErrorCode status; + + status = U_ZERO_ERROR; + uloc_toLanguageTag(localename, buf, sizeof(buf), TRUE, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not convert locale name \"%s\" to language tag: %s", + localename, u_errorName(status)))); + + return pstrdup(buf); +} + + +static char * +get_icu_locale_comment(const char *localename) +{ + UErrorCode status; + UChar displayname[128]; + int32 len_uchar; + char *result; + + status = U_ZERO_ERROR; + len_uchar = uloc_getDisplayName(localename, "en", &displayname[0], sizeof(displayname), &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could get display name for locale \"%s\": %s", + localename, u_errorName(status)))); + + icu_from_uchar(&result, displayname, len_uchar); + + return result; +} +#endif /* USE_ICU */ + + Datum pg_import_system_collations(PG_FUNCTION_ARGS) { @@ -302,8 +491,10 @@ pg_import_system_collations(PG_FUNCTION_ARGS) count++; - CollationCreate(localebuf, nspid, GetUserId(), enc, - localebuf, localebuf, if_not_exists); + CollationCreate(localebuf, nspid, GetUserId(), COLLPROVIDER_LIBC, enc, + localebuf, localebuf, + get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), + if_not_exists); CommandCounterIncrement(); @@ -316,7 +507,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) * "locale -a" output. So save up the aliases and try to add them * after we've read all the output. */ - if (normalize_locale_name(alias, localebuf)) + if (normalize_libc_locale_name(alias, localebuf)) { aliaslist = lappend(aliaslist, pstrdup(alias)); localelist = lappend(localelist, pstrdup(localebuf)); @@ -333,8 +524,10 @@ pg_import_system_collations(PG_FUNCTION_ARGS) char *locale = (char *) lfirst(lcl); int enc = lfirst_int(lce); - CollationCreate(alias, nspid, GetUserId(), enc, - locale, locale, true); + CollationCreate(alias, nspid, GetUserId(), COLLPROVIDER_LIBC, enc, + locale, locale, + get_collation_actual_version(COLLPROVIDER_LIBC, locale), + true); CommandCounterIncrement(); } @@ -343,5 +536,82 @@ pg_import_system_collations(PG_FUNCTION_ARGS) (errmsg("no usable system locales were found"))); #endif /* not HAVE_LOCALE_T && not WIN32 */ +#ifdef USE_ICU + if (!is_encoding_supported_by_icu(GetDatabaseEncoding())) + { + ereport(NOTICE, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("encoding \"%s\" not supported by ICU", + pg_encoding_to_char(GetDatabaseEncoding())))); + } + else + { + int i; + + /* + * Start the loop at -1 to sneak in the root locale without too much + * code duplication. + */ + for (i = -1; i < ucol_countAvailable(); i++) + { + const char *name; + char *langtag; + const char *collcollate; + UEnumeration *en; + UErrorCode status; + const char *val; + Oid collid; + + if (i == -1) + name = ""; /* ICU root locale */ + else + name = ucol_getAvailable(i); + + langtag = get_icu_language_tag(name); + collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name; + collid = CollationCreate(psprintf("%s-x-icu", langtag), + nspid, GetUserId(), COLLPROVIDER_ICU, -1, + collcollate, collcollate, + get_collation_actual_version(COLLPROVIDER_ICU, collcollate), + if_not_exists); + + CreateComments(collid, CollationRelationId, 0, + get_icu_locale_comment(name)); + + /* + * Add keyword variants + */ + status = U_ZERO_ERROR; + en = ucol_getKeywordValuesForLocale("collation", name, TRUE, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not get keyword values for locale \"%s\": %s", + name, u_errorName(status)))); + + status = U_ZERO_ERROR; + uenum_reset(en, &status); + while ((val = uenum_next(en, NULL, &status))) + { + char *localeid = psprintf("%s@collation=%s", name, val); + + langtag = get_icu_language_tag(localeid); + collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : localeid; + collid = CollationCreate(psprintf("%s-x-icu", langtag), + nspid, GetUserId(), COLLPROVIDER_ICU, -1, + collcollate, collcollate, + get_collation_actual_version(COLLPROVIDER_ICU, collcollate), + if_not_exists); + CreateComments(collid, CollationRelationId, 0, + get_icu_locale_comment(localeid)); + } + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not get keyword values for locale \"%s\": %s", + name, u_errorName(status)))); + uenum_close(en); + } + } +#endif + PG_RETURN_VOID(); } diff --git a/src/backend/common.mk b/src/backend/common.mk index 5d599dbd0ca..0b57543bc4a 100644 --- a/src/backend/common.mk +++ b/src/backend/common.mk @@ -8,6 +8,8 @@ # this directory and SUBDIRS to subdirectories containing more things # to build. +override CPPFLAGS := $(CPPFLAGS) $(ICU_CFLAGS) + ifdef PARTIAL_LINKING # old style: linking using SUBSYS.o subsysfilename = SUBSYS.o diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 93d4eb207f1..93bda427153 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -3046,6 +3046,16 @@ _copyAlterTableCmd(const AlterTableCmd *from) return newnode; } +static AlterCollationStmt * +_copyAlterCollationStmt(const AlterCollationStmt *from) +{ + AlterCollationStmt *newnode = makeNode(AlterCollationStmt); + + COPY_NODE_FIELD(collname); + + return newnode; +} + static AlterDomainStmt * _copyAlterDomainStmt(const AlterDomainStmt *from) { @@ -4986,6 +4996,9 @@ copyObject(const void *from) case T_AlterTableCmd: retval = _copyAlterTableCmd(from); break; + case T_AlterCollationStmt: + retval = _copyAlterCollationStmt(from); + break; case T_AlterDomainStmt: retval = _copyAlterDomainStmt(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 6b40b56f71e..0d12636d92c 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -1096,6 +1096,14 @@ _equalAlterTableCmd(const AlterTableCmd *a, const AlterTableCmd *b) } static bool +_equalAlterCollationStmt(const AlterCollationStmt *a, const AlterCollationStmt *b) +{ + COMPARE_NODE_FIELD(collname); + + return true; +} + +static bool _equalAlterDomainStmt(const AlterDomainStmt *a, const AlterDomainStmt *b) { COMPARE_SCALAR_FIELD(subtype); @@ -3174,6 +3182,9 @@ equal(const void *a, const void *b) case T_AlterTableCmd: retval = _equalAlterTableCmd(a, b); break; + case T_AlterCollationStmt: + retval = _equalAlterCollationStmt(a, b); + break; case T_AlterDomainStmt: retval = _equalAlterDomainStmt(a, b); break; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 50126baacf6..82844a0399d 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -244,7 +244,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); } %type <node> stmt schema_stmt - AlterEventTrigStmt + AlterEventTrigStmt AlterCollationStmt AlterDatabaseStmt AlterDatabaseSetStmt AlterDomainStmt AlterEnumStmt AlterFdwStmt AlterForeignServerStmt AlterGroupStmt AlterObjectDependsStmt AlterObjectSchemaStmt AlterOwnerStmt @@ -812,6 +812,7 @@ stmtmulti: stmtmulti ';' stmt stmt : AlterEventTrigStmt + | AlterCollationStmt | AlterDatabaseStmt | AlterDatabaseSetStmt | AlterDefaultPrivilegesStmt @@ -9707,6 +9708,21 @@ DropdbStmt: DROP DATABASE database_name /***************************************************************************** * + * ALTER COLLATION + * + *****************************************************************************/ + +AlterCollationStmt: ALTER COLLATION any_name REFRESH VERSION_P + { + AlterCollationStmt *n = makeNode(AlterCollationStmt); + n->collname = $3; + $$ = (Node *)n; + } + ; + + +/***************************************************************************** + * * ALTER SYSTEM * * This is used to change configuration parameters persistently. diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index 0121cbb2ada..4bdcb4fd6ae 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -68,7 +68,8 @@ typedef enum PG_REGEX_LOCALE_WIDE, /* Use <wctype.h> functions */ PG_REGEX_LOCALE_1BYTE, /* Use <ctype.h> functions */ PG_REGEX_LOCALE_WIDE_L, /* Use locale_t <wctype.h> functions */ - PG_REGEX_LOCALE_1BYTE_L /* Use locale_t <ctype.h> functions */ + PG_REGEX_LOCALE_1BYTE_L, /* Use locale_t <ctype.h> functions */ + PG_REGEX_LOCALE_ICU /* Use ICU uchar.h functions */ } PG_Locale_Strategy; static PG_Locale_Strategy pg_regex_strategy; @@ -262,6 +263,11 @@ pg_set_regex_collation(Oid collation) errhint("Use the COLLATE clause to set the collation explicitly."))); } +#ifdef USE_ICU + if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU) + pg_regex_strategy = PG_REGEX_LOCALE_ICU; + else +#endif #ifdef USE_WIDE_UPPER_LOWER if (GetDatabaseEncoding() == PG_UTF8) { @@ -303,13 +309,18 @@ pg_wc_isdigit(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswdigit_l((wint_t) c, pg_regex_locale); + return iswdigit_l((wint_t) c, pg_regex_locale->info.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isdigit_l((unsigned char) c, pg_regex_locale)); + isdigit_l((unsigned char) c, pg_regex_locale->info.lt)); +#endif + break; + case PG_REGEX_LOCALE_ICU: +#ifdef USE_ICU + return u_isdigit(c); #endif break; } @@ -336,13 +347,18 @@ pg_wc_isalpha(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalpha_l((wint_t) c, pg_regex_locale); + return iswalpha_l((wint_t) c, pg_regex_locale->info.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isalpha_l((unsigned char) c, pg_regex_locale)); + isalpha_l((unsigned char) c, pg_regex_locale->info.lt)); +#endif + break; + case PG_REGEX_LOCALE_ICU: +#ifdef USE_ICU + return u_isalpha(c); #endif break; } @@ -369,13 +385,18 @@ pg_wc_isalnum(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalnum_l((wint_t) c, pg_regex_locale); + return iswalnum_l((wint_t) c, pg_regex_locale->info.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isalnum_l((unsigned char) c, pg_regex_locale)); + isalnum_l((unsigned char) c, pg_regex_locale->info.lt)); +#endif + break; + case PG_REGEX_LOCALE_ICU: +#ifdef USE_ICU + return u_isalnum(c); #endif break; } @@ -402,13 +423,18 @@ pg_wc_isupper(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswupper_l((wint_t) c, pg_regex_locale); + return iswupper_l((wint_t) c, pg_regex_locale->info.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isupper_l((unsigned char) c, pg_regex_locale)); + isupper_l((unsigned char) c, pg_regex_locale->info.lt)); +#endif + break; + case PG_REGEX_LOCALE_ICU: +#ifdef USE_ICU + return u_isupper(c); #endif break; } @@ -435,13 +461,18 @@ pg_wc_islower(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswlower_l((wint_t) c, pg_regex_locale); + return iswlower_l((wint_t) c, pg_regex_locale->info.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - islower_l((unsigned char) c, pg_regex_locale)); + islower_l((unsigned char) c, pg_regex_locale->info.lt)); +#endif + break; + case PG_REGEX_LOCALE_ICU: +#ifdef USE_ICU + return u_islower(c); #endif break; } @@ -468,13 +499,18 @@ pg_wc_isgraph(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswgraph_l((wint_t) c, pg_regex_locale); + return iswgraph_l((wint_t) c, pg_regex_locale->info.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isgraph_l((unsigned char) c, pg_regex_locale)); + isgraph_l((unsigned char) c, pg_regex_locale->info.lt)); +#endif + break; + case PG_REGEX_LOCALE_ICU: +#ifdef USE_ICU + return u_isgraph(c); #endif break; } @@ -501,13 +537,18 @@ pg_wc_isprint(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswprint_l((wint_t) c, pg_regex_locale); + return iswprint_l((wint_t) c, pg_regex_locale->info.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isprint_l((unsigned char) c, pg_regex_locale)); + isprint_l((unsigned char) c, pg_regex_locale->info.lt)); +#endif + break; + case PG_REGEX_LOCALE_ICU: +#ifdef USE_ICU + return u_isprint(c); #endif break; } @@ -534,13 +575,18 @@ pg_wc_ispunct(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswpunct_l((wint_t) c, pg_regex_locale); + return iswpunct_l((wint_t) c, pg_regex_locale->info.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - ispunct_l((unsigned char) c, pg_regex_locale)); + ispunct_l((unsigned char) c, pg_regex_locale->info.lt)); +#endif + break; + case PG_REGEX_LOCALE_ICU: +#ifdef USE_ICU + return u_ispunct(c); #endif break; } @@ -567,13 +613,18 @@ pg_wc_isspace(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswspace_l((wint_t) c, pg_regex_locale); + return iswspace_l((wint_t) c, pg_regex_locale->info.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && - isspace_l((unsigned char) c, pg_regex_locale)); + isspace_l((unsigned char) c, pg_regex_locale->info.lt)); +#endif + break; + case PG_REGEX_LOCALE_ICU: +#ifdef USE_ICU + return u_isspace(c); #endif break; } @@ -608,15 +659,20 @@ pg_wc_toupper(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towupper_l((wint_t) c, pg_regex_locale); + return towupper_l((wint_t) c, pg_regex_locale->info.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T if (c <= (pg_wchar) UCHAR_MAX) - return toupper_l((unsigned char) c, pg_regex_locale); + return toupper_l((unsigned char) c, pg_regex_locale->info.lt); #endif return c; + case PG_REGEX_LOCALE_ICU: +#ifdef USE_ICU + return u_toupper(c); +#endif + break; } return 0; /* can't get here, but keep compiler quiet */ } @@ -649,15 +705,20 @@ pg_wc_tolower(pg_wchar c) case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towlower_l((wint_t) c, pg_regex_locale); + return towlower_l((wint_t) c, pg_regex_locale->info.lt); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T if (c <= (pg_wchar) UCHAR_MAX) - return tolower_l((unsigned char) c, pg_regex_locale); + return tolower_l((unsigned char) c, pg_regex_locale->info.lt); #endif return c; + case PG_REGEX_LOCALE_ICU: +#ifdef USE_ICU + return u_tolower(c); +#endif + break; } return 0; /* can't get here, but keep compiler quiet */ } @@ -808,6 +869,9 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode) max_chr = (pg_wchar) MAX_SIMPLE_CHR; #endif break; + case PG_REGEX_LOCALE_ICU: + max_chr = (pg_wchar) MAX_SIMPLE_CHR; + break; default: max_chr = 0; /* can't get here, but keep compiler quiet */ break; diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 20b52734054..c8d20fffeaf 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1623,6 +1623,10 @@ ProcessUtilitySlow(ParseState *pstate, commandCollected = true; break; + case T_AlterCollationStmt: + address = AlterCollation((AlterCollationStmt *) parsetree); + break; + default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(parsetree)); @@ -2673,6 +2677,10 @@ CreateCommandTag(Node *parsetree) tag = "DROP SUBSCRIPTION"; break; + case T_AlterCollationStmt: + tag = "ALTER COLLATION"; + break; + case T_PrepareStmt: tag = "PREPARE"; break; diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index c16bfbca933..0566abd314d 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -82,6 +82,10 @@ #include <wctype.h> #endif +#ifdef USE_ICU +#include <unicode/ustring.h> +#endif + #include "catalog/pg_collation.h" #include "mb/pg_wchar.h" #include "utils/builtins.h" @@ -1443,6 +1447,42 @@ str_numth(char *dest, char *num, int type) * upper/lower/initcap functions *****************************************************************************/ +#ifdef USE_ICU +static int32_t +icu_convert_case(int32_t (*func)(UChar *, int32_t, const UChar *, int32_t, const char *, UErrorCode *), + pg_locale_t mylocale, UChar **buff_dest, UChar *buff_source, int32_t len_source) +{ + UErrorCode status; + int32_t len_dest; + + len_dest = len_source; /* try first with same length */ + *buff_dest = palloc(len_dest * sizeof(**buff_dest)); + status = U_ZERO_ERROR; + len_dest = func(*buff_dest, len_dest, buff_source, len_source, mylocale->info.icu.locale, &status); + if (status == U_BUFFER_OVERFLOW_ERROR) + { + /* try again with adjusted length */ + pfree(buff_dest); + buff_dest = palloc(len_dest * sizeof(**buff_dest)); + status = U_ZERO_ERROR; + len_dest = func(*buff_dest, len_dest, buff_source, len_source, mylocale->info.icu.locale, &status); + } + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("case conversion failed: %s", u_errorName(status)))); + return len_dest; +} + +static int32_t +u_strToTitle_default_BI(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode) +{ + return u_strToTitle(dest, destCapacity, src, srcLength, NULL, locale, pErrorCode); +} +#endif + /* * If the system provides the needed functions for wide-character manipulation * (which are all standardized by C99), then we implement upper/lower/initcap @@ -1479,12 +1519,9 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) result = asc_tolower(buff, nbytes); } #ifdef USE_WIDE_UPPER_LOWER - else if (pg_database_encoding_max_length() > 1) + else { pg_locale_t mylocale = 0; - wchar_t *workspace; - size_t curr_char; - size_t result_size; if (collid != DEFAULT_COLLATION_OID) { @@ -1502,77 +1539,79 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) mylocale = pg_newlocale_from_collation(collid); } - /* Overflow paranoia */ - if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); +#ifdef USE_ICU + if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + { + int32_t len_uchar; + int32_t len_conv; + UChar *buff_uchar; + UChar *buff_conv; + + len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); + len_conv = icu_convert_case(u_strToLower, mylocale, &buff_conv, buff_uchar, len_uchar); + icu_from_uchar(&result, buff_conv, len_conv); + } + else +#endif + { + if (pg_database_encoding_max_length() > 1) + { + wchar_t *workspace; + size_t curr_char; + size_t result_size; - /* Output workspace cannot have more codes than input bytes */ - workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); + /* Overflow paranoia */ + if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); - char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - for (curr_char = 0; workspace[curr_char] != 0; curr_char++) - { + char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { #ifdef HAVE_LOCALE_T - if (mylocale) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale); - else + if (mylocale) + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); + else #endif - workspace[curr_char] = towlower(workspace[curr_char]); - } + workspace[curr_char] = towlower(workspace[curr_char]); + } - /* Make result large enough; case change might change number of bytes */ - result_size = curr_char * pg_database_encoding_max_length() + 1; - result = palloc(result_size); + /* Make result large enough; case change might change number of bytes */ + result_size = curr_char * pg_database_encoding_max_length() + 1; + result = palloc(result_size); - wchar2char(result, workspace, result_size, mylocale); - pfree(workspace); - } + wchar2char(result, workspace, result_size, mylocale); + pfree(workspace); + } #endif /* USE_WIDE_UPPER_LOWER */ - else - { -#ifdef HAVE_LOCALE_T - pg_locale_t mylocale = 0; -#endif - char *p; - - if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) + else { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for lower() function"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } -#ifdef HAVE_LOCALE_T - mylocale = pg_newlocale_from_collation(collid); -#endif - } + char *p; - result = pnstrdup(buff, nbytes); + result = pnstrdup(buff, nbytes); - /* - * Note: we assume that tolower_l() will not be so broken as to need - * an isupper_l() guard test. When using the default collation, we - * apply the traditional Postgres behavior that forces ASCII-style - * treatment of I/i, but in non-default collations you get exactly - * what the collation says. - */ - for (p = result; *p; p++) - { + /* + * Note: we assume that tolower_l() will not be so broken as to need + * an isupper_l() guard test. When using the default collation, we + * apply the traditional Postgres behavior that forces ASCII-style + * treatment of I/i, but in non-default collations you get exactly + * what the collation says. + */ + for (p = result; *p; p++) + { #ifdef HAVE_LOCALE_T - if (mylocale) - *p = tolower_l((unsigned char) *p, mylocale); - else + if (mylocale) + *p = tolower_l((unsigned char) *p, mylocale->info.lt); + else #endif - *p = pg_tolower((unsigned char) *p); + *p = pg_tolower((unsigned char) *p); + } + } } } @@ -1599,12 +1638,9 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) result = asc_toupper(buff, nbytes); } #ifdef USE_WIDE_UPPER_LOWER - else if (pg_database_encoding_max_length() > 1) + else { pg_locale_t mylocale = 0; - wchar_t *workspace; - size_t curr_char; - size_t result_size; if (collid != DEFAULT_COLLATION_OID) { @@ -1622,77 +1658,78 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) mylocale = pg_newlocale_from_collation(collid); } - /* Overflow paranoia */ - if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); +#ifdef USE_ICU + if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + { + int32_t len_uchar, len_conv; + UChar *buff_uchar; + UChar *buff_conv; - /* Output workspace cannot have more codes than input bytes */ - workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); + len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); + len_conv = icu_convert_case(u_strToUpper, mylocale, &buff_conv, buff_uchar, len_uchar); + icu_from_uchar(&result, buff_conv, len_conv); + } + else +#endif + { + if (pg_database_encoding_max_length() > 1) + { + wchar_t *workspace; + size_t curr_char; + size_t result_size; - char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + /* Overflow paranoia */ + if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); - for (curr_char = 0; workspace[curr_char] != 0; curr_char++) - { -#ifdef HAVE_LOCALE_T - if (mylocale) - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale); - else -#endif - workspace[curr_char] = towupper(workspace[curr_char]); - } + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - /* Make result large enough; case change might change number of bytes */ - result_size = curr_char * pg_database_encoding_max_length() + 1; - result = palloc(result_size); + char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); - wchar2char(result, workspace, result_size, mylocale); - pfree(workspace); - } -#endif /* USE_WIDE_UPPER_LOWER */ - else - { + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { #ifdef HAVE_LOCALE_T - pg_locale_t mylocale = 0; + if (mylocale) + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); + else #endif - char *p; + workspace[curr_char] = towupper(workspace[curr_char]); + } - if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for upper() function"), - errhint("Use the COLLATE clause to set the collation explicitly."))); + /* Make result large enough; case change might change number of bytes */ + result_size = curr_char * pg_database_encoding_max_length() + 1; + result = palloc(result_size); + + wchar2char(result, workspace, result_size, mylocale); + pfree(workspace); } -#ifdef HAVE_LOCALE_T - mylocale = pg_newlocale_from_collation(collid); -#endif - } +#endif /* USE_WIDE_UPPER_LOWER */ + else + { + char *p; - result = pnstrdup(buff, nbytes); + result = pnstrdup(buff, nbytes); - /* - * Note: we assume that toupper_l() will not be so broken as to need - * an islower_l() guard test. When using the default collation, we - * apply the traditional Postgres behavior that forces ASCII-style - * treatment of I/i, but in non-default collations you get exactly - * what the collation says. - */ - for (p = result; *p; p++) - { + /* + * Note: we assume that toupper_l() will not be so broken as to need + * an islower_l() guard test. When using the default collation, we + * apply the traditional Postgres behavior that forces ASCII-style + * treatment of I/i, but in non-default collations you get exactly + * what the collation says. + */ + for (p = result; *p; p++) + { #ifdef HAVE_LOCALE_T - if (mylocale) - *p = toupper_l((unsigned char) *p, mylocale); - else + if (mylocale) + *p = toupper_l((unsigned char) *p, mylocale->info.lt); + else #endif - *p = pg_toupper((unsigned char) *p); + *p = pg_toupper((unsigned char) *p); + } + } } } @@ -1720,12 +1757,9 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) result = asc_initcap(buff, nbytes); } #ifdef USE_WIDE_UPPER_LOWER - else if (pg_database_encoding_max_length() > 1) + else { pg_locale_t mylocale = 0; - wchar_t *workspace; - size_t curr_char; - size_t result_size; if (collid != DEFAULT_COLLATION_OID) { @@ -1743,100 +1777,101 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) mylocale = pg_newlocale_from_collation(collid); } - /* Overflow paranoia */ - if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - - /* Output workspace cannot have more codes than input bytes */ - workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - - char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); - - for (curr_char = 0; workspace[curr_char] != 0; curr_char++) +#ifdef USE_ICU + if (mylocale && mylocale->provider == COLLPROVIDER_ICU) { -#ifdef HAVE_LOCALE_T - if (mylocale) - { - if (wasalnum) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale); - else - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale); - wasalnum = iswalnum_l(workspace[curr_char], mylocale); - } - else + int32_t len_uchar, len_conv; + UChar *buff_uchar; + UChar *buff_conv; + + len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); + len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale, &buff_conv, buff_uchar, len_uchar); + icu_from_uchar(&result, buff_conv, len_conv); + } + else #endif + { + if (pg_database_encoding_max_length() > 1) { - if (wasalnum) - workspace[curr_char] = towlower(workspace[curr_char]); - else - workspace[curr_char] = towupper(workspace[curr_char]); - wasalnum = iswalnum(workspace[curr_char]); - } - } + wchar_t *workspace; + size_t curr_char; + size_t result_size; - /* Make result large enough; case change might change number of bytes */ - result_size = curr_char * pg_database_encoding_max_length() + 1; - result = palloc(result_size); + /* Overflow paranoia */ + if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); - wchar2char(result, workspace, result_size, mylocale); - pfree(workspace); - } -#endif /* USE_WIDE_UPPER_LOWER */ - else - { -#ifdef HAVE_LOCALE_T - pg_locale_t mylocale = 0; -#endif - char *p; + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for initcap() function"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } + char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { #ifdef HAVE_LOCALE_T - mylocale = pg_newlocale_from_collation(collid); + if (mylocale) + { + if (wasalnum) + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); + else + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); + wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt); + } + else #endif - } + { + if (wasalnum) + workspace[curr_char] = towlower(workspace[curr_char]); + else + workspace[curr_char] = towupper(workspace[curr_char]); + wasalnum = iswalnum(workspace[curr_char]); + } + } - result = pnstrdup(buff, nbytes); + /* Make result large enough; case change might change number of bytes */ + result_size = curr_char * pg_database_encoding_max_length() + 1; + result = palloc(result_size); - /* - * Note: we assume that toupper_l()/tolower_l() will not be so broken - * as to need guard tests. When using the default collation, we apply - * the traditional Postgres behavior that forces ASCII-style treatment - * of I/i, but in non-default collations you get exactly what the - * collation says. - */ - for (p = result; *p; p++) - { -#ifdef HAVE_LOCALE_T - if (mylocale) - { - if (wasalnum) - *p = tolower_l((unsigned char) *p, mylocale); - else - *p = toupper_l((unsigned char) *p, mylocale); - wasalnum = isalnum_l((unsigned char) *p, mylocale); + wchar2char(result, workspace, result_size, mylocale); + pfree(workspace); } +#endif /* USE_WIDE_UPPER_LOWER */ else -#endif { - if (wasalnum) - *p = pg_tolower((unsigned char) *p); - else - *p = pg_toupper((unsigned char) *p); - wasalnum = isalnum((unsigned char) *p); + char *p; + + result = pnstrdup(buff, nbytes); + + /* + * Note: we assume that toupper_l()/tolower_l() will not be so broken + * as to need guard tests. When using the default collation, we apply + * the traditional Postgres behavior that forces ASCII-style treatment + * of I/i, but in non-default collations you get exactly what the + * collation says. + */ + for (p = result; *p; p++) + { +#ifdef HAVE_LOCALE_T + if (mylocale) + { + if (wasalnum) + *p = tolower_l((unsigned char) *p, mylocale->info.lt); + else + *p = toupper_l((unsigned char) *p, mylocale->info.lt); + wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt); + } + else +#endif + { + if (wasalnum) + *p = pg_tolower((unsigned char) *p); + else + *p = pg_toupper((unsigned char) *p); + wasalnum = isalnum((unsigned char) *p); + } + } } } } diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 8d9d285fb55..1f683ccd0f7 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -96,7 +96,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) return pg_ascii_tolower(c); #ifdef HAVE_LOCALE_T else if (locale) - return tolower_l(c, locale); + return tolower_l(c, locale->info.lt); #endif else return pg_tolower(c); @@ -165,14 +165,36 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) *p; int slen, plen; + pg_locale_t locale = 0; + bool locale_is_c = false; + + if (lc_ctype_is_c(collation)) + locale_is_c = true; + else if (collation != DEFAULT_COLLATION_OID) + { + if (!OidIsValid(collation)) + { + /* + * This typically means that the parser could not resolve a + * conflict of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for ILIKE"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + locale = pg_newlocale_from_collation(collation); + } /* * For efficiency reasons, in the single byte case we don't call lower() * on the pattern and text, but instead call SB_lower_char on each - * character. In the multi-byte case we don't have much choice :-( + * character. In the multi-byte case we don't have much choice :-(. + * Also, ICU does not support single-character case folding, so we go the + * long way. */ - if (pg_database_encoding_max_length() > 1) + if (pg_database_encoding_max_length() > 1 || locale->provider == COLLPROVIDER_ICU) { /* lower's result is never packed, so OK to use old macros here */ pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation, @@ -190,31 +212,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) } else { - /* - * Here we need to prepare locale information for SB_lower_char. This - * should match the methods used in str_tolower(). - */ - pg_locale_t locale = 0; - bool locale_is_c = false; - - if (lc_ctype_is_c(collation)) - locale_is_c = true; - else if (collation != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collation)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for ILIKE"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } - locale = pg_newlocale_from_collation(collation); - } - p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); s = VARDATA_ANY(str); diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index ab197025f81..2a2c9bc5046 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -57,11 +57,17 @@ #include "catalog/pg_collation.h" #include "catalog/pg_control.h" #include "mb/pg_wchar.h" +#include "utils/builtins.h" #include "utils/hsearch.h" +#include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/pg_locale.h" #include "utils/syscache.h" +#ifdef USE_ICU +#include <unicode/ucnv.h> +#endif + #ifdef WIN32 /* * This Windows file defines StrNCpy. We don't need it here, so we undefine @@ -1272,12 +1278,13 @@ pg_newlocale_from_collation(Oid collid) if (cache_entry->locale == 0) { /* We haven't computed this yet in this session, so do it */ -#ifdef HAVE_LOCALE_T HeapTuple tp; Form_pg_collation collform; const char *collcollate; - const char *collctype; - locale_t result; + const char *collctype pg_attribute_unused(); + pg_locale_t result; + Datum collversion; + bool isnull; tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); if (!HeapTupleIsValid(tp)) @@ -1287,61 +1294,230 @@ pg_newlocale_from_collation(Oid collid) collcollate = NameStr(collform->collcollate); collctype = NameStr(collform->collctype); - if (strcmp(collcollate, collctype) == 0) + result = malloc(sizeof(* result)); + memset(result, 0, sizeof(* result)); + result->provider = collform->collprovider; + + if (collform->collprovider == COLLPROVIDER_LIBC) { - /* Normal case where they're the same */ +#ifdef HAVE_LOCALE_T + locale_t loc; + + if (strcmp(collcollate, collctype) == 0) + { + /* Normal case where they're the same */ #ifndef WIN32 - result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, - NULL); + loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate, + NULL); #else - result = _create_locale(LC_ALL, collcollate); + loc = _create_locale(LC_ALL, collcollate); #endif - if (!result) - report_newlocale_failure(collcollate); - } - else - { + if (!loc) + report_newlocale_failure(collcollate); + } + else + { #ifndef WIN32 - /* We need two newlocale() steps */ - locale_t loc1; - - loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL); - if (!loc1) - report_newlocale_failure(collcollate); - result = newlocale(LC_CTYPE_MASK, collctype, loc1); - if (!result) - report_newlocale_failure(collctype); + /* We need two newlocale() steps */ + locale_t loc1; + + loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL); + if (!loc1) + report_newlocale_failure(collcollate); + loc = newlocale(LC_CTYPE_MASK, collctype, loc1); + if (!loc) + report_newlocale_failure(collctype); #else - /* - * XXX The _create_locale() API doesn't appear to support this. - * Could perhaps be worked around by changing pg_locale_t to - * contain two separate fields. - */ + /* + * XXX The _create_locale() API doesn't appear to support this. + * Could perhaps be worked around by changing pg_locale_t to + * contain two separate fields. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("collations with different collate and ctype values are not supported on this platform"))); +#endif + } + + result->info.lt = loc; +#else /* not HAVE_LOCALE_T */ + /* platform that doesn't support locale_t */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("collations with different collate and ctype values are not supported on this platform"))); -#endif + errmsg("collation provider LIBC is not supported on this platform"))); +#endif /* not HAVE_LOCALE_T */ + } + else if (collform->collprovider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU + UCollator *collator; + UErrorCode status; + + status = U_ZERO_ERROR; + collator = ucol_open(collcollate, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\": %s", + collcollate, u_errorName(status)))); + + result->info.icu.locale = strdup(collcollate); + result->info.icu.ucol = collator; +#else /* not USE_ICU */ + /* could get here if a collation was created by a build with ICU */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ICU is not supported in this build"), \ + errhint("You need to rebuild PostgreSQL using --with-icu."))); +#endif /* not USE_ICU */ } - cache_entry->locale = result; + collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion, + &isnull); + if (!isnull) + { + char *actual_versionstr; + char *collversionstr; + + actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate); + if (!actual_versionstr) + /* This could happen when specifying a version in CREATE + * COLLATION for a libc locale, or manually creating a mess + * in the catalogs. */ + ereport(ERROR, + (errmsg("collation \"%s\" has no actual version, but a version was specified", + NameStr(collform->collname)))); + collversionstr = TextDatumGetCString(collversion); + + if (strcmp(actual_versionstr, collversionstr) != 0) + ereport(WARNING, + (errmsg("collation \"%s\" has version mismatch", + NameStr(collform->collname)), + errdetail("The collation in the database was created using version %s, " + "but the operating system provides version %s.", + collversionstr, actual_versionstr), + errhint("Rebuild all objects affected by this collation and run " + "ALTER COLLATION %s REFRESH VERSION, " + "or build PostgreSQL with the right library version.", + quote_qualified_identifier(get_namespace_name(collform->collnamespace), + NameStr(collform->collname))))); + } ReleaseSysCache(tp); -#else /* not HAVE_LOCALE_T */ - /* - * For platforms that don't support locale_t, we can't do anything - * with non-default collations. - */ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("nondefault collations are not supported on this platform"))); -#endif /* not HAVE_LOCALE_T */ + cache_entry->locale = result; } return cache_entry->locale; } +/* + * Get provider-specific collation version string for the given collation from + * the operating system/library. + * + * A particular provider must always either return a non-NULL string or return + * NULL (if it doesn't support versions). It must not return NULL for some + * collcollate and not NULL for others. + */ +char * +get_collation_actual_version(char collprovider, const char *collcollate) +{ + char *collversion; + +#ifdef USE_ICU + if (collprovider == COLLPROVIDER_ICU) + { + UCollator *collator; + UErrorCode status; + UVersionInfo versioninfo; + char buf[U_MAX_VERSION_STRING_LENGTH]; + + status = U_ZERO_ERROR; + collator = ucol_open(collcollate, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\": %s", + collcollate, u_errorName(status)))); + ucol_getVersion(collator, versioninfo); + ucol_close(collator); + + u_versionToString(versioninfo, buf); + collversion = pstrdup(buf); + } + else +#endif + collversion = NULL; + + return collversion; +} + + +#ifdef USE_ICU +/* + * Converter object for converting between ICU's UChar strings and C strings + * in database encoding. Since the database encoding doesn't change, we only + * need one of these per session. + */ +static UConverter *icu_converter = NULL; + +static void +init_icu_converter(void) +{ + const char *icu_encoding_name; + UErrorCode status; + UConverter *conv; + + if (icu_converter) + return; + + icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding()); + + status = U_ZERO_ERROR; + conv = ucnv_open(icu_encoding_name, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open ICU converter for encoding \"%s\": %s", + icu_encoding_name, u_errorName(status)))); + + icu_converter = conv; +} + +int32_t +icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes) +{ + UErrorCode status; + int32_t len_uchar; + + init_icu_converter(); + + len_uchar = 2 * nbytes; /* max length per docs */ + *buff_uchar = palloc(len_uchar * sizeof(**buff_uchar)); + status = U_ZERO_ERROR; + len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar, buff, nbytes, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("ucnv_toUChars failed: %s", u_errorName(status)))); + return len_uchar; +} + +int32_t +icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar) +{ + UErrorCode status; + int32_t len_result; + + init_icu_converter(); + + len_result = UCNV_GET_MAX_BYTES_FOR_STRING(len_uchar, ucnv_getMaxCharSize(icu_converter)); + *result = palloc(len_result + 1); + status = U_ZERO_ERROR; + ucnv_fromUChars(icu_converter, *result, len_result, buff_uchar, len_uchar, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("ucnv_fromUChars failed: %s", u_errorName(status)))); + return len_result; +} +#endif /* * These functions convert from/to libc's wchar_t, *not* pg_wchar_t. @@ -1362,6 +1538,8 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) { size_t result; + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + if (tolen == 0) return 0; @@ -1398,10 +1576,10 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) #ifdef HAVE_LOCALE_T #ifdef HAVE_WCSTOMBS_L /* Use wcstombs_l for nondefault locales */ - result = wcstombs_l(to, from, tolen, locale); + result = wcstombs_l(to, from, tolen, locale->info.lt); #else /* !HAVE_WCSTOMBS_L */ /* We have to temporarily set the locale as current ... ugh */ - locale_t save_locale = uselocale(locale); + locale_t save_locale = uselocale(locale->info.lt); result = wcstombs(to, from, tolen); @@ -1432,6 +1610,8 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, { size_t result; + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + if (tolen == 0) return 0; @@ -1473,10 +1653,10 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, #ifdef HAVE_LOCALE_T #ifdef HAVE_MBSTOWCS_L /* Use mbstowcs_l for nondefault locales */ - result = mbstowcs_l(to, str, tolen, locale); + result = mbstowcs_l(to, str, tolen, locale->info.lt); #else /* !HAVE_MBSTOWCS_L */ /* We have to temporarily set the locale as current ... ugh */ - locale_t save_locale = uselocale(locale); + locale_t save_locale = uselocale(locale->info.lt); result = mbstowcs(to, str, tolen); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index bb9a5446861..f8b28fe0e61 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -5259,7 +5259,7 @@ find_join_input_rel(PlannerInfo *root, Relids relids) /* * Check whether char is a letter (and, hence, subject to case-folding) * - * In multibyte character sets, we can't use isalpha, and it does not seem + * In multibyte character sets or with ICU, we can't use isalpha, and it does not seem * worth trying to convert to wchar_t to use iswalpha. Instead, just assume * any multibyte char is potentially case-varying. */ @@ -5271,9 +5271,11 @@ pattern_char_isalpha(char c, bool is_multibyte, return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); else if (is_multibyte && IS_HIGHBIT_SET(c)) return true; + else if (locale && locale->provider == COLLPROVIDER_ICU) + return IS_HIGHBIT_SET(c) ? true : false; #ifdef HAVE_LOCALE_T - else if (locale) - return isalpha_l((unsigned char) c, locale); + else if (locale && locale->provider == COLLPROVIDER_LIBC) + return isalpha_l((unsigned char) c, locale->info.lt); #endif else return isalpha((unsigned char) c); diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index cd036afc004..aa556aa5deb 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -73,9 +73,7 @@ typedef struct hyperLogLogState abbr_card; /* Abbreviated key cardinality state */ hyperLogLogState full_card; /* Full key cardinality state */ double prop_card; /* Required cardinality proportion */ -#ifdef HAVE_LOCALE_T pg_locale_t locale; -#endif } VarStringSortSupport; /* @@ -1403,10 +1401,7 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid) char a2buf[TEXTBUFLEN]; char *a1p, *a2p; - -#ifdef HAVE_LOCALE_T pg_locale_t mylocale = 0; -#endif if (collid != DEFAULT_COLLATION_OID) { @@ -1421,9 +1416,7 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid) errmsg("could not determine which collation to use for string comparison"), errhint("Use the COLLATE clause to set the collation explicitly."))); } -#ifdef HAVE_LOCALE_T mylocale = pg_newlocale_from_collation(collid); -#endif } /* @@ -1542,11 +1535,54 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid) memcpy(a2p, arg2, len2); a2p[len2] = '\0'; -#ifdef HAVE_LOCALE_T if (mylocale) - result = strcoll_l(a1p, a2p, mylocale); - else + { + if (mylocale->provider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU +#ifdef HAVE_UCOL_STRCOLLUTF8 + if (GetDatabaseEncoding() == PG_UTF8) + { + UErrorCode status; + + status = U_ZERO_ERROR; + result = ucol_strcollUTF8(mylocale->info.icu.ucol, + arg1, len1, + arg2, len2, + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("collation failed: %s", u_errorName(status)))); + } + else +#endif + { + int32_t ulen1, ulen2; + UChar *uchar1, *uchar2; + + ulen1 = icu_to_uchar(&uchar1, arg1, len1); + ulen2 = icu_to_uchar(&uchar2, arg2, len2); + + result = ucol_strcoll(mylocale->info.icu.ucol, + uchar1, ulen1, + uchar2, ulen2); + } +#else /* not USE_ICU */ + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); +#endif /* not USE_ICU */ + } + else + { +#ifdef HAVE_LOCALE_T + result = strcoll_l(a1p, a2p, mylocale->info.lt); +#else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); #endif + } + } + else result = strcoll(a1p, a2p); /* @@ -1768,10 +1804,7 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar) bool abbreviate = ssup->abbreviate; bool collate_c = false; VarStringSortSupport *sss; - -#ifdef HAVE_LOCALE_T pg_locale_t locale = 0; -#endif /* * If possible, set ssup->comparator to a function which can be used to @@ -1826,9 +1859,7 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar) errmsg("could not determine which collation to use for string comparison"), errhint("Use the COLLATE clause to set the collation explicitly."))); } -#ifdef HAVE_LOCALE_T locale = pg_newlocale_from_collation(collid); -#endif } } @@ -1854,7 +1885,7 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar) * platforms. */ #ifndef TRUST_STRXFRM - if (!collate_c) + if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU)) abbreviate = false; #endif @@ -1877,9 +1908,7 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar) sss->last_len2 = -1; /* Initialize */ sss->last_returned = 0; -#ifdef HAVE_LOCALE_T sss->locale = locale; -#endif /* * To avoid somehow confusing a strxfrm() blob and an original string, @@ -2090,11 +2119,54 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup) goto done; } -#ifdef HAVE_LOCALE_T if (sss->locale) - result = strcoll_l(sss->buf1, sss->buf2, sss->locale); - else + { + if (sss->locale->provider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU +#ifdef HAVE_UCOL_STRCOLLUTF8 + if (GetDatabaseEncoding() == PG_UTF8) + { + UErrorCode status; + + status = U_ZERO_ERROR; + result = ucol_strcollUTF8(sss->locale->info.icu.ucol, + a1p, len1, + a2p, len2, + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("collation failed: %s", u_errorName(status)))); + } + else #endif + { + int32_t ulen1, ulen2; + UChar *uchar1, *uchar2; + + ulen1 = icu_to_uchar(&uchar1, a1p, len1); + ulen2 = icu_to_uchar(&uchar2, a2p, len2); + + result = ucol_strcoll(sss->locale->info.icu.ucol, + uchar1, ulen1, + uchar2, ulen2); + } +#else /* not USE_ICU */ + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", sss->locale->provider); +#endif /* not USE_ICU */ + } + else + { +#ifdef HAVE_LOCALE_T + result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt); +#else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", sss->locale->provider); +#endif + } + } + else result = strcoll(sss->buf1, sss->buf2); /* @@ -2200,9 +2272,14 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) else { Size bsize; +#ifdef USE_ICU + int32_t ulen = -1; + UChar *uchar; +#endif /* - * We're not using the C collation, so fall back on strxfrm. + * We're not using the C collation, so fall back on strxfrm or ICU + * analogs. */ /* By convention, we use buffer 1 to store and NUL-terminate */ @@ -2222,17 +2299,66 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) goto done; } - /* Just like strcoll(), strxfrm() expects a NUL-terminated string */ memcpy(sss->buf1, authoritative_data, len); + /* Just like strcoll(), strxfrm() expects a NUL-terminated string. + * Not necessary for ICU, but doesn't hurt. */ sss->buf1[len] = '\0'; sss->last_len1 = len; +#ifdef USE_ICU + /* When using ICU and not UTF8, convert string to UChar. */ + if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU && + GetDatabaseEncoding() != PG_UTF8) + ulen = icu_to_uchar(&uchar, sss->buf1, len); +#endif + + /* + * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer, + * and try again. Both of these functions have the result buffer + * content undefined if the result did not fit, so we need to retry + * until everything fits, even though we only need the first few bytes + * in the end. When using ucol_nextSortKeyPart(), however, we only + * ask for as many bytes as we actually need. + */ for (;;) { +#ifdef USE_ICU + if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU) + { + /* + * When using UTF8, use the iteration interface so we only + * need to produce as many bytes as we actually need. + */ + if (GetDatabaseEncoding() == PG_UTF8) + { + UCharIterator iter; + uint32_t state[2]; + UErrorCode status; + + uiter_setUTF8(&iter, sss->buf1, len); + state[0] = state[1] = 0; /* won't need that again */ + status = U_ZERO_ERROR; + bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol, + &iter, + state, + (uint8_t *) sss->buf2, + Min(sizeof(Datum), sss->buflen2), + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("sort key generation failed: %s", u_errorName(status)))); + } + else + bsize = ucol_getSortKey(sss->locale->info.icu.ucol, + uchar, ulen, + (uint8_t *) sss->buf2, sss->buflen2); + } + else +#endif #ifdef HAVE_LOCALE_T - if (sss->locale) + if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC) bsize = strxfrm_l(sss->buf2, sss->buf1, - sss->buflen2, sss->locale); + sss->buflen2, sss->locale->info.lt); else #endif bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2); @@ -2242,8 +2368,7 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) break; /* - * The C standard states that the contents of the buffer is now - * unspecified. Grow buffer, and retry. + * Grow buffer and retry. */ pfree(sss->buf2); sss->buflen2 = Max(bsize + 1, diff --git a/src/backend/utils/mb/encnames.c b/src/backend/utils/mb/encnames.c index 11099b844f4..444eec25b50 100644 --- a/src/backend/utils/mb/encnames.c +++ b/src/backend/utils/mb/encnames.c @@ -403,6 +403,82 @@ const pg_enc2gettext pg_enc2gettext_tbl[] = }; +#ifndef FRONTEND + +/* + * Table of encoding names for ICU + * + * Reference: <https://ssl.icu-project.org/icu-bin/convexp> + * + * NULL entries are not supported by ICU, or their mapping is unclear. + */ +static const char * const pg_enc2icu_tbl[] = +{ + NULL, /* PG_SQL_ASCII */ + "EUC-JP", /* PG_EUC_JP */ + "EUC-CN", /* PG_EUC_CN */ + "EUC-KR", /* PG_EUC_KR */ + "EUC-TW", /* PG_EUC_TW */ + NULL, /* PG_EUC_JIS_2004 */ + "UTF-8", /* PG_UTF8 */ + NULL, /* PG_MULE_INTERNAL */ + "ISO-8859-1", /* PG_LATIN1 */ + "ISO-8859-2", /* PG_LATIN2 */ + "ISO-8859-3", /* PG_LATIN3 */ + "ISO-8859-4", /* PG_LATIN4 */ + "ISO-8859-9", /* PG_LATIN5 */ + "ISO-8859-10", /* PG_LATIN6 */ + "ISO-8859-13", /* PG_LATIN7 */ + "ISO-8859-14", /* PG_LATIN8 */ + "ISO-8859-15", /* PG_LATIN9 */ + NULL, /* PG_LATIN10 */ + "CP1256", /* PG_WIN1256 */ + "CP1258", /* PG_WIN1258 */ + "CP866", /* PG_WIN866 */ + NULL, /* PG_WIN874 */ + "KOI8-R", /* PG_KOI8R */ + "CP1251", /* PG_WIN1251 */ + "CP1252", /* PG_WIN1252 */ + "ISO-8859-5", /* PG_ISO_8859_5 */ + "ISO-8859-6", /* PG_ISO_8859_6 */ + "ISO-8859-7", /* PG_ISO_8859_7 */ + "ISO-8859-8", /* PG_ISO_8859_8 */ + "CP1250", /* PG_WIN1250 */ + "CP1253", /* PG_WIN1253 */ + "CP1254", /* PG_WIN1254 */ + "CP1255", /* PG_WIN1255 */ + "CP1257", /* PG_WIN1257 */ + "KOI8-U", /* PG_KOI8U */ +}; + +bool +is_encoding_supported_by_icu(int encoding) +{ + return (pg_enc2icu_tbl[encoding] != NULL); +} + +const char * +get_encoding_name_for_icu(int encoding) +{ + const char *icu_encoding_name; + + StaticAssertStmt(lengthof(pg_enc2icu_tbl) == PG_ENCODING_BE_LAST + 1, + "pg_enc2icu_tbl incomplete"); + + icu_encoding_name = pg_enc2icu_tbl[encoding]; + + if (!icu_encoding_name) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("encoding \"%s\" not supported by ICU", + pg_encoding_to_char(encoding)))); + + return icu_encoding_name; +} + +#endif /* not FRONTEND */ + + /* ---------- * Encoding checks, for error returns -1 else encoding id * ---------- diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index e0c72fbb800..8dde1e8f9d4 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -62,6 +62,7 @@ #include "catalog/catalog.h" #include "catalog/pg_authid.h" #include "catalog/pg_class.h" +#include "catalog/pg_collation.h" #include "common/file_utils.h" #include "common/restricted_token.h" #include "common/username.h" @@ -1629,7 +1630,7 @@ setup_collation(FILE *cmdfd) PG_CMD_PUTS("SELECT pg_import_system_collations(if_not_exists => false, schema => 'pg_catalog');\n\n"); /* Add an SQL-standard name */ - PG_CMD_PRINTF2("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, %u, %d, 'C', 'C');\n\n", BOOTSTRAP_SUPERUSERID, PG_UTF8); + PG_CMD_PRINTF3("INSERT INTO pg_collation (collname, collnamespace, collowner, collprovider, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', %d, 'C', 'C');\n\n", BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8); } /* diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index a98747d89a4..b3d95d7f6ee 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -12834,8 +12834,10 @@ dumpCollation(Archive *fout, CollInfo *collinfo) PQExpBuffer delq; PQExpBuffer labelq; PGresult *res; + int i_collprovider; int i_collcollate; int i_collctype; + const char *collprovider; const char *collcollate; const char *collctype; @@ -12852,18 +12854,32 @@ dumpCollation(Archive *fout, CollInfo *collinfo) selectSourceSchema(fout, collinfo->dobj.namespace->dobj.name); /* Get collation-specific details */ - appendPQExpBuffer(query, "SELECT " - "collcollate, " - "collctype " - "FROM pg_catalog.pg_collation c " - "WHERE c.oid = '%u'::pg_catalog.oid", - collinfo->dobj.catId.oid); + if (fout->remoteVersion >= 100000) + appendPQExpBuffer(query, "SELECT " + "collprovider, " + "collcollate, " + "collctype, " + "collversion " + "FROM pg_catalog.pg_collation c " + "WHERE c.oid = '%u'::pg_catalog.oid", + collinfo->dobj.catId.oid); + else + appendPQExpBuffer(query, "SELECT " + "'p'::char AS collprovider, " + "collcollate, " + "collctype, " + "NULL AS collversion " + "FROM pg_catalog.pg_collation c " + "WHERE c.oid = '%u'::pg_catalog.oid", + collinfo->dobj.catId.oid); res = ExecuteSqlQueryForSingleRow(fout, query->data); + i_collprovider = PQfnumber(res, "collprovider"); i_collcollate = PQfnumber(res, "collcollate"); i_collctype = PQfnumber(res, "collctype"); + collprovider = PQgetvalue(res, 0, i_collprovider); collcollate = PQgetvalue(res, 0, i_collcollate); collctype = PQgetvalue(res, 0, i_collctype); @@ -12875,11 +12891,50 @@ dumpCollation(Archive *fout, CollInfo *collinfo) appendPQExpBuffer(delq, ".%s;\n", fmtId(collinfo->dobj.name)); - appendPQExpBuffer(q, "CREATE COLLATION %s (lc_collate = ", + appendPQExpBuffer(q, "CREATE COLLATION %s (", fmtId(collinfo->dobj.name)); - appendStringLiteralAH(q, collcollate, fout); - appendPQExpBufferStr(q, ", lc_ctype = "); - appendStringLiteralAH(q, collctype, fout); + + appendPQExpBufferStr(q, "provider = "); + if (collprovider[0] == 'c') + appendPQExpBufferStr(q, "libc"); + else if (collprovider[0] == 'i') + appendPQExpBufferStr(q, "icu"); + else + exit_horribly(NULL, + "unrecognized collation provider: %s\n", + collprovider); + + if (strcmp(collcollate, collctype) == 0) + { + appendPQExpBufferStr(q, ", locale = "); + appendStringLiteralAH(q, collcollate, fout); + } + else + { + appendPQExpBufferStr(q, ", lc_collate = "); + appendStringLiteralAH(q, collcollate, fout); + appendPQExpBufferStr(q, ", lc_ctype = "); + appendStringLiteralAH(q, collctype, fout); + } + + /* + * For binary upgrade, carry over the collation version. For normal + * dump/restore, omit the version, so that it is computed upon restore. + */ + if (dopt->binary_upgrade) + { + int i_collversion; + + i_collversion = PQfnumber(res, "collversion"); + if (!PQgetisnull(res, 0, i_collversion)) + { + appendPQExpBufferStr(q, ", version = "); + appendStringLiteralAH(q, + PQgetvalue(res, 0, i_collversion), + fout); + } + } + appendPQExpBufferStr(q, ");\n"); appendPQExpBuffer(labelq, "COLLATION %s", fmtId(collinfo->dobj.name)); diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 021f4bf081a..366737440ce 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -2424,7 +2424,7 @@ qr/^\QINSERT INTO test_fifth_table (col1, col2, col3, col4, col5) VALUES (NULL, 'CREATE COLLATION test0 FROM "C";', regexp => qr/^ - \QCREATE COLLATION test0 (lc_collate = 'C', lc_ctype = 'C');\E/xm, + \QCREATE COLLATION test0 (provider = libc, locale = 'C');\E/xm, collation => 1, like => { binary_upgrade => 1, diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 61a3e2a8483..8c583127fdd 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -3738,7 +3738,7 @@ listCollations(const char *pattern, bool verbose, bool showSystem) PQExpBufferData buf; PGresult *res; printQueryOpt myopt = pset.popt; - static const bool translate_columns[] = {false, false, false, false, false}; + static const bool translate_columns[] = {false, false, false, false, false, false}; if (pset.sversion < 90100) { @@ -3762,6 +3762,11 @@ listCollations(const char *pattern, bool verbose, bool showSystem) gettext_noop("Collate"), gettext_noop("Ctype")); + if (pset.sversion >= 100000) + appendPQExpBuffer(&buf, + ",\n CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\"", + gettext_noop("Provider")); + if (verbose) appendPQExpBuffer(&buf, ",\n pg_catalog.obj_description(c.oid, 'pg_collation') AS \"%s\"", diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h index 30c87e004ec..8edd8aa0662 100644 --- a/src/include/catalog/pg_collation.h +++ b/src/include/catalog/pg_collation.h @@ -34,9 +34,13 @@ CATALOG(pg_collation,3456) NameData collname; /* collation name */ Oid collnamespace; /* OID of namespace containing collation */ Oid collowner; /* owner of collation */ + char collprovider; /* see constants below */ int32 collencoding; /* encoding for this collation; -1 = "all" */ NameData collcollate; /* LC_COLLATE setting */ NameData collctype; /* LC_CTYPE setting */ +#ifdef CATALOG_VARLEN /* variable-length fields start here */ + text collversion; /* provider-dependent version of collation data */ +#endif } FormData_pg_collation; /* ---------------- @@ -50,27 +54,34 @@ typedef FormData_pg_collation *Form_pg_collation; * compiler constants for pg_collation * ---------------- */ -#define Natts_pg_collation 6 +#define Natts_pg_collation 8 #define Anum_pg_collation_collname 1 #define Anum_pg_collation_collnamespace 2 #define Anum_pg_collation_collowner 3 -#define Anum_pg_collation_collencoding 4 -#define Anum_pg_collation_collcollate 5 -#define Anum_pg_collation_collctype 6 +#define Anum_pg_collation_collprovider 4 +#define Anum_pg_collation_collencoding 5 +#define Anum_pg_collation_collcollate 6 +#define Anum_pg_collation_collctype 7 +#define Anum_pg_collation_collversion 8 /* ---------------- * initial contents of pg_collation * ---------------- */ -DATA(insert OID = 100 ( default PGNSP PGUID -1 "" "" )); +DATA(insert OID = 100 ( default PGNSP PGUID d -1 "" "" 0 )); DESCR("database's default collation"); #define DEFAULT_COLLATION_OID 100 -DATA(insert OID = 950 ( C PGNSP PGUID -1 "C" "C" )); +DATA(insert OID = 950 ( C PGNSP PGUID c -1 "C" "C" 0 )); DESCR("standard C collation"); #define C_COLLATION_OID 950 -DATA(insert OID = 951 ( POSIX PGNSP PGUID -1 "POSIX" "POSIX" )); +DATA(insert OID = 951 ( POSIX PGNSP PGUID c -1 "POSIX" "POSIX" 0 )); DESCR("standard POSIX collation"); #define POSIX_COLLATION_OID 951 + +#define COLLPROVIDER_DEFAULT 'd' +#define COLLPROVIDER_ICU 'i' +#define COLLPROVIDER_LIBC 'c' + #endif /* PG_COLLATION_H */ diff --git a/src/include/catalog/pg_collation_fn.h b/src/include/catalog/pg_collation_fn.h index 482ba7920e5..dfebdbaa0bb 100644 --- a/src/include/catalog/pg_collation_fn.h +++ b/src/include/catalog/pg_collation_fn.h @@ -16,8 +16,10 @@ extern Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, + char collprovider, int32 collencoding, const char *collcollate, const char *collctype, + const char *collversion, bool if_not_exists); extern void RemoveCollationById(Oid collationOid); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index a5b415346b7..0d18ab8c0dc 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -5401,6 +5401,9 @@ DESCR("pg_controldata init state information as a function"); DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "16 4089" _null_ _null_ "{if_not_exists,schema}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ )); DESCR("import collations from operating system"); +DATA(insert OID = 3448 ( pg_collation_actual_version PGNSP PGUID 12 100 0 0 0 f f f f t f v s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_collation_actual_version _null_ _null_ _null_ )); +DESCR("import collations from operating system"); + /* system management/monitoring related functions */ DATA(insert OID = 3353 ( pg_ls_logdir PGNSP PGUID 12 10 20 0 0 f f f f t t v s 0 0 2249 "" "{25,20,1184}" "{o,o,o}" "{name,size,modification}" _null_ _null_ pg_ls_logdir _null_ _null_ _null_ )); DESCR("list files in the log directory"); diff --git a/src/include/commands/collationcmds.h b/src/include/commands/collationcmds.h index 3b2fcb82711..df5623ccb6d 100644 --- a/src/include/commands/collationcmds.h +++ b/src/include/commands/collationcmds.h @@ -20,5 +20,6 @@ extern ObjectAddress DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists); extern void IsThereCollationInNamespace(const char *collname, Oid nspOid); +extern ObjectAddress AlterCollation(AlterCollationStmt *stmt); #endif /* COLLATIONCMDS_H */ diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 5f546973028..9c5e749c9e7 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -333,6 +333,12 @@ typedef struct pg_enc2gettext extern const pg_enc2gettext pg_enc2gettext_tbl[]; /* + * Encoding names for ICU + */ +extern bool is_encoding_supported_by_icu(int encoding); +extern const char *get_encoding_name_for_icu(int encoding); + +/* * pg_wchar stuff */ typedef int (*mb2wchar_with_len_converter) (const unsigned char *from, diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 9a4221a9e7b..b2d8514f895 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -424,6 +424,7 @@ typedef enum NodeTag T_CreateSubscriptionStmt, T_AlterSubscriptionStmt, T_DropSubscriptionStmt, + T_AlterCollationStmt, /* * TAGS FOR PARSE TREE NODES (parsenodes.h) diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 582e0e0ebe9..f3773ca9294 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1733,6 +1733,17 @@ typedef struct AlterTableCmd /* one subcommand of an ALTER TABLE */ /* ---------------------- + * Alter Collation + * ---------------------- + */ +typedef struct AlterCollationStmt +{ + NodeTag type; + List *collname; +} AlterCollationStmt; + + +/* ---------------------- * Alter Domain * * The fields are used in different ways by the different variants of diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 6a8176b323e..e1c1c9e9b47 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -606,6 +606,9 @@ /* Define to 1 if you have the external array `tzname'. */ #undef HAVE_TZNAME +/* Define to 1 if you have the `ucol_strcollUTF8' function. */ +#undef HAVE_UCOL_STRCOLLUTF8 + /* Define to 1 if you have the <ucred.h> header file. */ #undef HAVE_UCRED_H @@ -819,6 +822,9 @@ (--enable-float8-byval) */ #undef USE_FLOAT8_BYVAL +/* Define to build with ICU support. (--with-icu) */ +#undef USE_ICU + /* Define to 1 to build with LDAP support. (--with-ldap) */ #undef USE_LDAP diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index cb509e2b6b0..12d75474138 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -15,6 +15,9 @@ #if defined(LOCALE_T_IN_XLOCALE) || defined(WCSTOMBS_L_IN_XLOCALE) #include <xlocale.h> #endif +#ifdef USE_ICU +#include <unicode/ucol.h> +#endif #include "utils/guc.h" @@ -61,17 +64,36 @@ extern void cache_locale_time(void); * We define our own wrapper around locale_t so we can keep the same * function signatures for all builds, while not having to create a * fake version of the standard type locale_t in the global namespace. - * The fake version of pg_locale_t can be checked for truth; that's - * about all it will be needed for. + * pg_locale_t is occasionally checked for truth, so make it a pointer. */ +struct pg_locale_t +{ + char provider; + union + { #ifdef HAVE_LOCALE_T -typedef locale_t pg_locale_t; -#else -typedef int pg_locale_t; + locale_t lt; +#endif +#ifdef USE_ICU + struct { + const char *locale; + UCollator *ucol; + } icu; #endif + } info; +}; + +typedef struct pg_locale_t *pg_locale_t; extern pg_locale_t pg_newlocale_from_collation(Oid collid); +extern char *get_collation_actual_version(char collprovider, const char *collcollate); + +#ifdef USE_ICU +extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes); +extern int32_t icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar); +#endif + /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */ #ifdef USE_WIDE_UPPER_LOWER extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen, diff --git a/src/test/regress/GNUmakefile b/src/test/regress/GNUmakefile index b923ea14203..a747facb9af 100644 --- a/src/test/regress/GNUmakefile +++ b/src/test/regress/GNUmakefile @@ -125,6 +125,9 @@ tablespace-setup: ## REGRESS_OPTS = --dlpath=. $(EXTRA_REGRESS_OPTS) +ifeq ($(with_icu),yes) +override EXTRA_TESTS := collate.icu $(EXTRA_TESTS) +endif check: all tablespace-setup $(pg_regress_check) $(REGRESS_OPTS) --schedule=$(srcdir)/parallel_schedule $(MAXCONNOPT) $(EXTRA_TESTS) diff --git a/src/test/regress/expected/collate.icu.out b/src/test/regress/expected/collate.icu.out new file mode 100644 index 00000000000..e1fc9984f2f --- /dev/null +++ b/src/test/regress/expected/collate.icu.out @@ -0,0 +1,1126 @@ +/* + * This test is for ICU collations. + */ +SET client_encoding TO UTF8; +CREATE SCHEMA collate_tests; +SET search_path = collate_tests; +CREATE TABLE collate_test1 ( + a int, + b text COLLATE "en-x-icu" NOT NULL +); +\d collate_test1 + Table "collate_tests.collate_test1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | en-x-icu | not null | + +CREATE TABLE collate_test_fail ( + a int, + b text COLLATE "ja_JP.eucjp-x-icu" +); +ERROR: collation "ja_JP.eucjp-x-icu" for encoding "UTF8" does not exist +LINE 3: b text COLLATE "ja_JP.eucjp-x-icu" + ^ +CREATE TABLE collate_test_fail ( + a int, + b text COLLATE "foo-x-icu" +); +ERROR: collation "foo-x-icu" for encoding "UTF8" does not exist +LINE 3: b text COLLATE "foo-x-icu" + ^ +CREATE TABLE collate_test_fail ( + a int COLLATE "en-x-icu", + b text +); +ERROR: collations are not supported by type integer +LINE 2: a int COLLATE "en-x-icu", + ^ +CREATE TABLE collate_test_like ( + LIKE collate_test1 +); +\d collate_test_like + Table "collate_tests.collate_test_like" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | en-x-icu | not null | + +CREATE TABLE collate_test2 ( + a int, + b text COLLATE "sv-x-icu" +); +CREATE TABLE collate_test3 ( + a int, + b text COLLATE "C" +); +INSERT INTO collate_test1 VALUES (1, 'abc'), (2, 'äbc'), (3, 'bbc'), (4, 'ABC'); +INSERT INTO collate_test2 SELECT * FROM collate_test1; +INSERT INTO collate_test3 SELECT * FROM collate_test1; +SELECT * FROM collate_test1 WHERE b >= 'bbc'; + a | b +---+----- + 3 | bbc +(1 row) + +SELECT * FROM collate_test2 WHERE b >= 'bbc'; + a | b +---+----- + 2 | äbc + 3 | bbc +(2 rows) + +SELECT * FROM collate_test3 WHERE b >= 'bbc'; + a | b +---+----- + 2 | äbc + 3 | bbc +(2 rows) + +SELECT * FROM collate_test3 WHERE b >= 'BBC'; + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc +(3 rows) + +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc'; + a | b +---+----- + 2 | äbc + 3 | bbc +(2 rows) + +SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C"; + a | b +---+----- + 2 | äbc + 3 | bbc +(2 rows) + +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C"; + a | b +---+----- + 2 | äbc + 3 | bbc +(2 rows) + +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en-x-icu"; +ERROR: collation mismatch between explicit collations "C" and "en-x-icu" +LINE 1: ...* FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "e... + ^ +CREATE DOMAIN testdomain_sv AS text COLLATE "sv-x-icu"; +CREATE DOMAIN testdomain_i AS int COLLATE "sv-x-icu"; -- fails +ERROR: collations are not supported by type integer +CREATE TABLE collate_test4 ( + a int, + b testdomain_sv +); +INSERT INTO collate_test4 SELECT * FROM collate_test1; +SELECT a, b FROM collate_test4 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +CREATE TABLE collate_test5 ( + a int, + b testdomain_sv COLLATE "en-x-icu" +); +INSERT INTO collate_test5 SELECT * FROM collate_test1; +SELECT a, b FROM collate_test5 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, b FROM collate_test1 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, b FROM collate_test2 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b FROM collate_test3 ORDER BY b; + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C"; + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +-- star expansion +SELECT * FROM collate_test1 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT * FROM collate_test2 ORDER BY b; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT * FROM collate_test3 ORDER BY b; + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +-- constant expression folding +SELECT 'bbc' COLLATE "en-x-icu" > 'äbc' COLLATE "en-x-icu" AS "true"; + true +------ + t +(1 row) + +SELECT 'bbc' COLLATE "sv-x-icu" > 'äbc' COLLATE "sv-x-icu" AS "false"; + false +------- + f +(1 row) + +-- upper/lower +CREATE TABLE collate_test10 ( + a int, + x text COLLATE "en-x-icu", + y text COLLATE "tr-x-icu" +); +INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ'); +SELECT a, lower(x), lower(y), upper(x), upper(y), initcap(x), initcap(y) FROM collate_test10; + a | lower | lower | upper | upper | initcap | initcap +---+-------+-------+-------+-------+---------+--------- + 1 | hij | hij | HIJ | HİJ | Hij | Hij + 2 | hij | hıj | HIJ | HIJ | Hij | Hıj +(2 rows) + +SELECT a, lower(x COLLATE "C"), lower(y COLLATE "C") FROM collate_test10; + a | lower | lower +---+-------+------- + 1 | hij | hij + 2 | hij | hij +(2 rows) + +SELECT a, x, y FROM collate_test10 ORDER BY lower(y), a; + a | x | y +---+-----+----- + 2 | HIJ | HIJ + 1 | hij | hij +(2 rows) + +-- LIKE/ILIKE +SELECT * FROM collate_test1 WHERE b LIKE 'abc'; + a | b +---+----- + 1 | abc +(1 row) + +SELECT * FROM collate_test1 WHERE b LIKE 'abc%'; + a | b +---+----- + 1 | abc +(1 row) + +SELECT * FROM collate_test1 WHERE b LIKE '%bc%'; + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc +(3 rows) + +SELECT * FROM collate_test1 WHERE b ILIKE 'abc'; + a | b +---+----- + 1 | abc + 4 | ABC +(2 rows) + +SELECT * FROM collate_test1 WHERE b ILIKE 'abc%'; + a | b +---+----- + 1 | abc + 4 | ABC +(2 rows) + +SELECT * FROM collate_test1 WHERE b ILIKE '%bc%'; + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc + 4 | ABC +(4 rows) + +SELECT 'Türkiye' COLLATE "en-x-icu" ILIKE '%KI%' AS "true"; + true +------ + t +(1 row) + +SELECT 'Türkiye' COLLATE "tr-x-icu" ILIKE '%KI%' AS "false"; + false +------- + f +(1 row) + +SELECT 'bıt' ILIKE 'BIT' COLLATE "en-x-icu" AS "false"; + false +------- + f +(1 row) + +SELECT 'bıt' ILIKE 'BIT' COLLATE "tr-x-icu" AS "true"; + true +------ + t +(1 row) + +-- The following actually exercises the selectivity estimation for ILIKE. +SELECT relname FROM pg_class WHERE relname ILIKE 'abc%'; + relname +--------- +(0 rows) + +-- regular expressions +SELECT * FROM collate_test1 WHERE b ~ '^abc$'; + a | b +---+----- + 1 | abc +(1 row) + +SELECT * FROM collate_test1 WHERE b ~ '^abc'; + a | b +---+----- + 1 | abc +(1 row) + +SELECT * FROM collate_test1 WHERE b ~ 'bc'; + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc +(3 rows) + +SELECT * FROM collate_test1 WHERE b ~* '^abc$'; + a | b +---+----- + 1 | abc + 4 | ABC +(2 rows) + +SELECT * FROM collate_test1 WHERE b ~* '^abc'; + a | b +---+----- + 1 | abc + 4 | ABC +(2 rows) + +SELECT * FROM collate_test1 WHERE b ~* 'bc'; + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc + 4 | ABC +(4 rows) + +CREATE TABLE collate_test6 ( + a int, + b text COLLATE "en-x-icu" +); +INSERT INTO collate_test6 VALUES (1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'), + (5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, ' '), + (9, 'äbç'), (10, 'ÄBÇ'); +SELECT b, + b ~ '^[[:alpha:]]+$' AS is_alpha, + b ~ '^[[:upper:]]+$' AS is_upper, + b ~ '^[[:lower:]]+$' AS is_lower, + b ~ '^[[:digit:]]+$' AS is_digit, + b ~ '^[[:alnum:]]+$' AS is_alnum, + b ~ '^[[:graph:]]+$' AS is_graph, + b ~ '^[[:print:]]+$' AS is_print, + b ~ '^[[:punct:]]+$' AS is_punct, + b ~ '^[[:space:]]+$' AS is_space +FROM collate_test6; + b | is_alpha | is_upper | is_lower | is_digit | is_alnum | is_graph | is_print | is_punct | is_space +-----+----------+----------+----------+----------+----------+----------+----------+----------+---------- + abc | t | f | t | f | t | t | t | f | f + ABC | t | t | f | f | t | t | t | f | f + 123 | f | f | f | t | t | t | t | f | f + ab1 | f | f | f | f | t | t | t | f | f + a1! | f | f | f | f | f | t | t | f | f + a c | f | f | f | f | f | f | t | f | f + !.; | f | f | f | f | f | t | t | t | f + | f | f | f | f | f | f | t | f | t + äbç | t | f | t | f | t | t | t | f | f + ÄBÇ | t | t | f | f | t | t | t | f | f +(10 rows) + +SELECT 'Türkiye' COLLATE "en-x-icu" ~* 'KI' AS "true"; + true +------ + t +(1 row) + +SELECT 'Türkiye' COLLATE "tr-x-icu" ~* 'KI' AS "true"; -- true with ICU + true +------ + t +(1 row) + +SELECT 'bıt' ~* 'BIT' COLLATE "en-x-icu" AS "false"; + false +------- + f +(1 row) + +SELECT 'bıt' ~* 'BIT' COLLATE "tr-x-icu" AS "false"; -- false with ICU + false +------- + f +(1 row) + +-- The following actually exercises the selectivity estimation for ~*. +SELECT relname FROM pg_class WHERE relname ~* '^abc'; + relname +--------- +(0 rows) + +/* not run by default because it requires tr_TR system locale +-- to_char + +SET lc_time TO 'tr_TR'; +SELECT to_char(date '2010-04-01', 'DD TMMON YYYY'); +SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr-x-icu"); +*/ +-- backwards parsing +CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc'; +CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C"; +CREATE VIEW collview3 AS SELECT a, lower((x || x) COLLATE "C") FROM collate_test10; +SELECT table_name, view_definition FROM information_schema.views + WHERE table_name LIKE 'collview%' ORDER BY 1; + table_name | view_definition +------------+-------------------------------------------------------------------------- + collview1 | SELECT collate_test1.a, + + | collate_test1.b + + | FROM collate_test1 + + | WHERE ((collate_test1.b COLLATE "C") >= 'bbc'::text); + collview2 | SELECT collate_test1.a, + + | collate_test1.b + + | FROM collate_test1 + + | ORDER BY (collate_test1.b COLLATE "C"); + collview3 | SELECT collate_test10.a, + + | lower(((collate_test10.x || collate_test10.x) COLLATE "C")) AS lower+ + | FROM collate_test10; +(3 rows) + +-- collation propagation in various expression types +SELECT a, coalesce(b, 'foo') FROM collate_test1 ORDER BY 2; + a | coalesce +---+---------- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, coalesce(b, 'foo') FROM collate_test2 ORDER BY 2; + a | coalesce +---+---------- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, coalesce(b, 'foo') FROM collate_test3 ORDER BY 2; + a | coalesce +---+---------- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, lower(coalesce(x, 'foo')), lower(coalesce(y, 'foo')) FROM collate_test10; + a | lower | lower +---+-------+------- + 1 | hij | hij + 2 | hij | hıj +(2 rows) + +SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3; + a | b | greatest +---+-----+---------- + 1 | abc | CCC + 2 | äbc | CCC + 3 | bbc | CCC + 4 | ABC | CCC +(4 rows) + +SELECT a, b, greatest(b, 'CCC') FROM collate_test2 ORDER BY 3; + a | b | greatest +---+-----+---------- + 1 | abc | CCC + 3 | bbc | CCC + 4 | ABC | CCC + 2 | äbc | äbc +(4 rows) + +SELECT a, b, greatest(b, 'CCC') FROM collate_test3 ORDER BY 3; + a | b | greatest +---+-----+---------- + 4 | ABC | CCC + 1 | abc | abc + 3 | bbc | bbc + 2 | äbc | äbc +(4 rows) + +SELECT a, x, y, lower(greatest(x, 'foo')), lower(greatest(y, 'foo')) FROM collate_test10; + a | x | y | lower | lower +---+-----+-----+-------+------- + 1 | hij | hij | hij | hij + 2 | HIJ | HIJ | hij | hıj +(2 rows) + +SELECT a, nullif(b, 'abc') FROM collate_test1 ORDER BY 2; + a | nullif +---+-------- + 4 | ABC + 2 | äbc + 3 | bbc + 1 | +(4 rows) + +SELECT a, nullif(b, 'abc') FROM collate_test2 ORDER BY 2; + a | nullif +---+-------- + 4 | ABC + 3 | bbc + 2 | äbc + 1 | +(4 rows) + +SELECT a, nullif(b, 'abc') FROM collate_test3 ORDER BY 2; + a | nullif +---+-------- + 4 | ABC + 3 | bbc + 2 | äbc + 1 | +(4 rows) + +SELECT a, lower(nullif(x, 'foo')), lower(nullif(y, 'foo')) FROM collate_test10; + a | lower | lower +---+-------+------- + 1 | hij | hij + 2 | hij | hıj +(2 rows) + +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test1 ORDER BY 2; + a | b +---+------ + 4 | ABC + 2 | äbc + 1 | abcd + 3 | bbc +(4 rows) + +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test2 ORDER BY 2; + a | b +---+------ + 4 | ABC + 1 | abcd + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test3 ORDER BY 2; + a | b +---+------ + 4 | ABC + 1 | abcd + 3 | bbc + 2 | äbc +(4 rows) + +CREATE DOMAIN testdomain AS text; +SELECT a, b::testdomain FROM collate_test1 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, b::testdomain FROM collate_test2 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b::testdomain FROM collate_test3 ORDER BY 2; + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b::testdomain_sv FROM collate_test3 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, lower(x::testdomain), lower(y::testdomain) FROM collate_test10; + a | lower | lower +---+-------+------- + 1 | hij | hij + 2 | hij | hıj +(2 rows) + +SELECT min(b), max(b) FROM collate_test1; + min | max +-----+----- + abc | bbc +(1 row) + +SELECT min(b), max(b) FROM collate_test2; + min | max +-----+----- + abc | äbc +(1 row) + +SELECT min(b), max(b) FROM collate_test3; + min | max +-----+----- + ABC | äbc +(1 row) + +SELECT array_agg(b ORDER BY b) FROM collate_test1; + array_agg +------------------- + {abc,ABC,äbc,bbc} +(1 row) + +SELECT array_agg(b ORDER BY b) FROM collate_test2; + array_agg +------------------- + {abc,ABC,bbc,äbc} +(1 row) + +SELECT array_agg(b ORDER BY b) FROM collate_test3; + array_agg +------------------- + {ABC,abc,bbc,äbc} +(1 row) + +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2; + a | b +---+----- + 1 | abc + 1 | abc + 4 | ABC + 4 | ABC + 2 | äbc + 2 | äbc + 3 | bbc + 3 | bbc +(8 rows) + +SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b FROM collate_test3 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test3 WHERE a > 1 ORDER BY 2; + a | b +---+----- + 3 | bbc + 2 | äbc +(2 rows) + +SELECT a, b FROM collate_test3 EXCEPT SELECT a, b FROM collate_test3 WHERE a < 2 ORDER BY 2; + a | b +---+----- + 4 | ABC + 3 | bbc + 2 | äbc +(3 rows) + +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +ERROR: could not determine which collation to use for string comparison +HINT: Use the COLLATE clause to set the collation explicitly. +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok + a | b +---+----- + 1 | abc + 2 | äbc + 3 | bbc + 4 | ABC + 1 | abc + 2 | äbc + 3 | bbc + 4 | ABC +(8 rows) + +SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +ERROR: collation mismatch between implicit collations "en-x-icu" and "C" +LINE 1: SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collat... + ^ +HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. +SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- ok + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +ERROR: collation mismatch between implicit collations "en-x-icu" and "C" +LINE 1: ...ELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM col... + ^ +HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. +SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +ERROR: collation mismatch between implicit collations "en-x-icu" and "C" +LINE 1: SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM colla... + ^ +HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. +CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- fail +ERROR: no collation was derived for column "b" with collatable type text +HINT: Use the COLLATE clause to set the collation explicitly. +-- ideally this would be a parse-time error, but for now it must be run-time: +select x < y from collate_test10; -- fail +ERROR: could not determine which collation to use for string comparison +HINT: Use the COLLATE clause to set the collation explicitly. +select x || y from collate_test10; -- ok, because || is not collation aware + ?column? +---------- + hijhij + HIJHIJ +(2 rows) + +select x, y from collate_test10 order by x || y; -- not so ok +ERROR: collation mismatch between implicit collations "en-x-icu" and "tr-x-icu" +LINE 1: select x, y from collate_test10 order by x || y; + ^ +HINT: You can choose the collation by applying the COLLATE clause to one or both expressions. +-- collation mismatch between recursive and non-recursive term +WITH RECURSIVE foo(x) AS + (SELECT x FROM (VALUES('a' COLLATE "en-x-icu"),('b')) t(x) + UNION ALL + SELECT (x || 'c') COLLATE "de-x-icu" FROM foo WHERE length(x) < 10) +SELECT * FROM foo; +ERROR: recursive query "foo" column 1 has collation "en-x-icu" in non-recursive term but collation "de-x-icu" overall +LINE 2: (SELECT x FROM (VALUES('a' COLLATE "en-x-icu"),('b')) t(x... + ^ +HINT: Use the COLLATE clause to set the collation of the non-recursive term. +-- casting +SELECT CAST('42' AS text COLLATE "C"); +ERROR: syntax error at or near "COLLATE" +LINE 1: SELECT CAST('42' AS text COLLATE "C"); + ^ +SELECT a, CAST(b AS varchar) FROM collate_test1 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, CAST(b AS varchar) FROM collate_test2 ORDER BY 2; + a | b +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, CAST(b AS varchar) FROM collate_test3 ORDER BY 2; + a | b +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +-- propagation of collation in SQL functions (inlined and non-inlined cases) +-- and plpgsql functions too +CREATE FUNCTION mylt (text, text) RETURNS boolean LANGUAGE sql + AS $$ select $1 < $2 $$; +CREATE FUNCTION mylt_noninline (text, text) RETURNS boolean LANGUAGE sql + AS $$ select $1 < $2 limit 1 $$; +CREATE FUNCTION mylt_plpgsql (text, text) RETURNS boolean LANGUAGE plpgsql + AS $$ begin return $1 < $2; end $$; +SELECT a.b AS a, b.b AS b, a.b < b.b AS lt, + mylt(a.b, b.b), mylt_noninline(a.b, b.b), mylt_plpgsql(a.b, b.b) +FROM collate_test1 a, collate_test1 b +ORDER BY a.b, b.b; + a | b | lt | mylt | mylt_noninline | mylt_plpgsql +-----+-----+----+------+----------------+-------------- + abc | abc | f | f | f | f + abc | ABC | t | t | t | t + abc | äbc | t | t | t | t + abc | bbc | t | t | t | t + ABC | abc | f | f | f | f + ABC | ABC | f | f | f | f + ABC | äbc | t | t | t | t + ABC | bbc | t | t | t | t + äbc | abc | f | f | f | f + äbc | ABC | f | f | f | f + äbc | äbc | f | f | f | f + äbc | bbc | t | t | t | t + bbc | abc | f | f | f | f + bbc | ABC | f | f | f | f + bbc | äbc | f | f | f | f + bbc | bbc | f | f | f | f +(16 rows) + +SELECT a.b AS a, b.b AS b, a.b < b.b COLLATE "C" AS lt, + mylt(a.b, b.b COLLATE "C"), mylt_noninline(a.b, b.b COLLATE "C"), + mylt_plpgsql(a.b, b.b COLLATE "C") +FROM collate_test1 a, collate_test1 b +ORDER BY a.b, b.b; + a | b | lt | mylt | mylt_noninline | mylt_plpgsql +-----+-----+----+------+----------------+-------------- + abc | abc | f | f | f | f + abc | ABC | f | f | f | f + abc | äbc | t | t | t | t + abc | bbc | t | t | t | t + ABC | abc | t | t | t | t + ABC | ABC | f | f | f | f + ABC | äbc | t | t | t | t + ABC | bbc | t | t | t | t + äbc | abc | f | f | f | f + äbc | ABC | f | f | f | f + äbc | äbc | f | f | f | f + äbc | bbc | f | f | f | f + bbc | abc | f | f | f | f + bbc | ABC | f | f | f | f + bbc | äbc | t | t | t | t + bbc | bbc | f | f | f | f +(16 rows) + +-- collation override in plpgsql +CREATE FUNCTION mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$ +declare + xx text := x; + yy text := y; +begin + return xx < yy; +end +$$; +SELECT mylt2('a', 'B' collate "en-x-icu") as t, mylt2('a', 'B' collate "C") as f; + t | f +---+--- + t | f +(1 row) + +CREATE OR REPLACE FUNCTION + mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$ +declare + xx text COLLATE "POSIX" := x; + yy text := y; +begin + return xx < yy; +end +$$; +SELECT mylt2('a', 'B') as f; + f +--- + f +(1 row) + +SELECT mylt2('a', 'B' collate "C") as fail; -- conflicting collations +ERROR: could not determine which collation to use for string comparison +HINT: Use the COLLATE clause to set the collation explicitly. +CONTEXT: PL/pgSQL function mylt2(text,text) line 6 at RETURN +SELECT mylt2('a', 'B' collate "POSIX") as f; + f +--- + f +(1 row) + +-- polymorphism +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test1)) ORDER BY 1; + unnest +-------- + abc + ABC + äbc + bbc +(4 rows) + +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test2)) ORDER BY 1; + unnest +-------- + abc + ABC + bbc + äbc +(4 rows) + +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test3)) ORDER BY 1; + unnest +-------- + ABC + abc + bbc + äbc +(4 rows) + +CREATE FUNCTION dup (anyelement) RETURNS anyelement + AS 'select $1' LANGUAGE sql; +SELECT a, dup(b) FROM collate_test1 ORDER BY 2; + a | dup +---+----- + 1 | abc + 4 | ABC + 2 | äbc + 3 | bbc +(4 rows) + +SELECT a, dup(b) FROM collate_test2 ORDER BY 2; + a | dup +---+----- + 1 | abc + 4 | ABC + 3 | bbc + 2 | äbc +(4 rows) + +SELECT a, dup(b) FROM collate_test3 ORDER BY 2; + a | dup +---+----- + 4 | ABC + 1 | abc + 3 | bbc + 2 | äbc +(4 rows) + +-- indexes +CREATE INDEX collate_test1_idx1 ON collate_test1 (b); +CREATE INDEX collate_test1_idx2 ON collate_test1 (b COLLATE "C"); +CREATE INDEX collate_test1_idx3 ON collate_test1 ((b COLLATE "C")); -- this is different grammatically +CREATE INDEX collate_test1_idx4 ON collate_test1 (((b||'foo') COLLATE "POSIX")); +CREATE INDEX collate_test1_idx5 ON collate_test1 (a COLLATE "C"); -- fail +ERROR: collations are not supported by type integer +CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C")); -- fail +ERROR: collations are not supported by type integer +LINE 1: ...ATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C... + ^ +SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1; + relname | pg_get_indexdef +--------------------+----------------------------------------------------------------------------------------------------- + collate_test1_idx1 | CREATE INDEX collate_test1_idx1 ON collate_test1 USING btree (b) + collate_test1_idx2 | CREATE INDEX collate_test1_idx2 ON collate_test1 USING btree (b COLLATE "C") + collate_test1_idx3 | CREATE INDEX collate_test1_idx3 ON collate_test1 USING btree (b COLLATE "C") + collate_test1_idx4 | CREATE INDEX collate_test1_idx4 ON collate_test1 USING btree (((b || 'foo'::text)) COLLATE "POSIX") +(4 rows) + +-- schema manipulation commands +CREATE ROLE regress_test_role; +CREATE SCHEMA test_schema; +-- We need to do this this way to cope with varying names for encodings: +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' || + quote_literal(current_setting('lc_collate')) || ');'; +END +$$; +CREATE COLLATION test0 FROM "C"; -- fail, duplicate name +ERROR: collation "test0" already exists +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test1 (provider = icu, lc_collate = ' || + quote_literal(current_setting('lc_collate')) || + ', lc_ctype = ' || + quote_literal(current_setting('lc_ctype')) || ');'; +END +$$; +CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, need lc_ctype +ERROR: parameter "lc_ctype" must be specified +CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */ DROP COLLATION testx; +CREATE COLLATION test4 FROM nonsense; +ERROR: collation "nonsense" for encoding "UTF8" does not exist +CREATE COLLATION test5 FROM test0; +SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1; + collname +---------- + test0 + test1 + test5 +(3 rows) + +ALTER COLLATION test1 RENAME TO test11; +ALTER COLLATION test0 RENAME TO test11; -- fail +ERROR: collation "test11" already exists in schema "collate_tests" +ALTER COLLATION test1 RENAME TO test22; -- fail +ERROR: collation "test1" for encoding "UTF8" does not exist +ALTER COLLATION test11 OWNER TO regress_test_role; +ALTER COLLATION test11 OWNER TO nonsense; +ERROR: role "nonsense" does not exist +ALTER COLLATION test11 SET SCHEMA test_schema; +COMMENT ON COLLATION test0 IS 'US English'; +SELECT collname, nspname, obj_description(pg_collation.oid, 'pg_collation') + FROM pg_collation JOIN pg_namespace ON (collnamespace = pg_namespace.oid) + WHERE collname LIKE 'test%' + ORDER BY 1; + collname | nspname | obj_description +----------+---------------+----------------- + test0 | collate_tests | US English + test11 | test_schema | + test5 | collate_tests | +(3 rows) + +DROP COLLATION test0, test_schema.test11, test5; +DROP COLLATION test0; -- fail +ERROR: collation "test0" for encoding "UTF8" does not exist +DROP COLLATION IF EXISTS test0; +NOTICE: collation "test0" does not exist, skipping +SELECT collname FROM pg_collation WHERE collname LIKE 'test%'; + collname +---------- +(0 rows) + +DROP SCHEMA test_schema; +DROP ROLE regress_test_role; +-- ALTER +ALTER COLLATION "en-x-icu" REFRESH VERSION; +NOTICE: version has not changed +-- dependencies +CREATE COLLATION test0 FROM "C"; +CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0); +CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0; +CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0); +CREATE VIEW collate_dep_test3 AS SELECT text 'foo' COLLATE test0 AS foo; +CREATE TABLE collate_dep_test4t (a int, b text); +CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0); +DROP COLLATION test0 RESTRICT; -- fail +ERROR: cannot drop collation test0 because other objects depend on it +DETAIL: table collate_dep_test1 column b depends on collation test0 +type collate_dep_dom1 depends on collation test0 +composite type collate_dep_test2 column y depends on collation test0 +view collate_dep_test3 depends on collation test0 +index collate_dep_test4i depends on collation test0 +HINT: Use DROP ... CASCADE to drop the dependent objects too. +DROP COLLATION test0 CASCADE; +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table collate_dep_test1 column b +drop cascades to type collate_dep_dom1 +drop cascades to composite type collate_dep_test2 column y +drop cascades to view collate_dep_test3 +drop cascades to index collate_dep_test4i +\d collate_dep_test1 + Table "collate_tests.collate_dep_test1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + +\d collate_dep_test2 + Composite type "collate_tests.collate_dep_test2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + x | integer | | | + +DROP TABLE collate_dep_test1, collate_dep_test4t; +DROP TYPE collate_dep_test2; +-- test range types and collations +create type textrange_c as range(subtype=text, collation="C"); +create type textrange_en_us as range(subtype=text, collation="en-x-icu"); +select textrange_c('A','Z') @> 'b'::text; + ?column? +---------- + f +(1 row) + +select textrange_en_us('A','Z') @> 'b'::text; + ?column? +---------- + t +(1 row) + +drop type textrange_c; +drop type textrange_en_us; +-- cleanup +DROP SCHEMA collate_tests CASCADE; +NOTICE: drop cascades to 18 other objects +DETAIL: drop cascades to table collate_test1 +drop cascades to table collate_test_like +drop cascades to table collate_test2 +drop cascades to table collate_test3 +drop cascades to type testdomain_sv +drop cascades to table collate_test4 +drop cascades to table collate_test5 +drop cascades to table collate_test10 +drop cascades to table collate_test6 +drop cascades to view collview1 +drop cascades to view collview2 +drop cascades to view collview3 +drop cascades to type testdomain +drop cascades to function mylt(text,text) +drop cascades to function mylt_noninline(text,text) +drop cascades to function mylt_plpgsql(text,text) +drop cascades to function mylt2(text,text) +drop cascades to function dup(anyelement) +RESET search_path; +-- leave a collation for pg_upgrade test +CREATE COLLATION coll_icu_upgrade FROM "und-x-icu"; diff --git a/src/test/regress/expected/collate.linux.utf8.out b/src/test/regress/expected/collate.linux.utf8.out index 293e78641ec..26275c3fb3e 100644 --- a/src/test/regress/expected/collate.linux.utf8.out +++ b/src/test/regress/expected/collate.linux.utf8.out @@ -4,12 +4,14 @@ * because other encodings don't support all the characters used. */ SET client_encoding TO UTF8; +CREATE SCHEMA collate_tests; +SET search_path = collate_tests; CREATE TABLE collate_test1 ( a int, b text COLLATE "en_US" NOT NULL ); \d collate_test1 - Table "public.collate_test1" + Table "collate_tests.collate_test1" Column | Type | Collation | Nullable | Default --------+---------+-----------+----------+--------- a | integer | | | @@ -40,7 +42,7 @@ CREATE TABLE collate_test_like ( LIKE collate_test1 ); \d collate_test_like - Table "public.collate_test_like" + Table "collate_tests.collate_test_like" Column | Type | Collation | Nullable | Default --------+---------+-----------+----------+--------- a | integer | | | @@ -364,6 +366,38 @@ SELECT * FROM collate_test1 WHERE b ~* 'bc'; 4 | ABC (4 rows) +CREATE TABLE collate_test6 ( + a int, + b text COLLATE "en_US" +); +INSERT INTO collate_test6 VALUES (1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'), + (5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, ' '), + (9, 'äbç'), (10, 'ÄBÇ'); +SELECT b, + b ~ '^[[:alpha:]]+$' AS is_alpha, + b ~ '^[[:upper:]]+$' AS is_upper, + b ~ '^[[:lower:]]+$' AS is_lower, + b ~ '^[[:digit:]]+$' AS is_digit, + b ~ '^[[:alnum:]]+$' AS is_alnum, + b ~ '^[[:graph:]]+$' AS is_graph, + b ~ '^[[:print:]]+$' AS is_print, + b ~ '^[[:punct:]]+$' AS is_punct, + b ~ '^[[:space:]]+$' AS is_space +FROM collate_test6; + b | is_alpha | is_upper | is_lower | is_digit | is_alnum | is_graph | is_print | is_punct | is_space +-----+----------+----------+----------+----------+----------+----------+----------+----------+---------- + abc | t | f | t | f | t | t | t | f | f + ABC | t | t | f | f | t | t | t | f | f + 123 | f | f | f | t | t | t | t | f | f + ab1 | f | f | f | f | t | t | t | f | f + a1! | f | f | f | f | f | t | t | f | f + a c | f | f | f | f | f | f | t | f | f + !.; | f | f | f | f | f | t | t | t | f + | f | f | f | f | f | f | t | f | t + äbç | t | f | t | f | t | t | t | f | f + ÄBÇ | t | t | f | f | t | t | t | f | f +(10 rows) + SELECT 'Türkiye' COLLATE "en_US" ~* 'KI' AS "true"; true ------ @@ -980,6 +1014,8 @@ ERROR: parameter "lc_ctype" must be specified CREATE COLLATION testx (locale = 'nonsense'); -- fail ERROR: could not create locale "nonsense": No such file or directory DETAIL: The operating system could not find any locale data for the locale name "nonsense". +CREATE COLLATION testy (locale = 'en_US.utf8', version = 'foo'); -- fail, no versions for libc +ERROR: collation "testy" has no actual version, but a version was specified CREATE COLLATION test4 FROM nonsense; ERROR: collation "nonsense" for encoding "UTF8" does not exist CREATE COLLATION test5 FROM test0; @@ -993,7 +1029,7 @@ SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1; ALTER COLLATION test1 RENAME TO test11; ALTER COLLATION test0 RENAME TO test11; -- fail -ERROR: collation "test11" for encoding "UTF8" already exists in schema "public" +ERROR: collation "test11" for encoding "UTF8" already exists in schema "collate_tests" ALTER COLLATION test1 RENAME TO test22; -- fail ERROR: collation "test1" for encoding "UTF8" does not exist ALTER COLLATION test11 OWNER TO regress_test_role; @@ -1005,11 +1041,11 @@ SELECT collname, nspname, obj_description(pg_collation.oid, 'pg_collation') FROM pg_collation JOIN pg_namespace ON (collnamespace = pg_namespace.oid) WHERE collname LIKE 'test%' ORDER BY 1; - collname | nspname | obj_description -----------+-------------+----------------- - test0 | public | US English - test11 | test_schema | - test5 | public | + collname | nspname | obj_description +----------+---------------+----------------- + test0 | collate_tests | US English + test11 | test_schema | + test5 | collate_tests | (3 rows) DROP COLLATION test0, test_schema.test11, test5; @@ -1024,6 +1060,9 @@ SELECT collname FROM pg_collation WHERE collname LIKE 'test%'; DROP SCHEMA test_schema; DROP ROLE regress_test_role; +-- ALTER +ALTER COLLATION "en_US" REFRESH VERSION; +NOTICE: version has not changed -- dependencies CREATE COLLATION test0 FROM "C"; CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0); @@ -1048,13 +1087,13 @@ drop cascades to composite type collate_dep_test2 column y drop cascades to view collate_dep_test3 drop cascades to index collate_dep_test4i \d collate_dep_test1 - Table "public.collate_dep_test1" + Table "collate_tests.collate_dep_test1" Column | Type | Collation | Nullable | Default --------+---------+-----------+----------+--------- a | integer | | | \d collate_dep_test2 - Composite type "public.collate_dep_test2" + Composite type "collate_tests.collate_dep_test2" Column | Type | Collation | Nullable | Default --------+---------+-----------+----------+--------- x | integer | | | @@ -1078,3 +1117,24 @@ select textrange_en_us('A','Z') @> 'b'::text; drop type textrange_c; drop type textrange_en_us; +-- cleanup +DROP SCHEMA collate_tests CASCADE; +NOTICE: drop cascades to 18 other objects +DETAIL: drop cascades to table collate_test1 +drop cascades to table collate_test_like +drop cascades to table collate_test2 +drop cascades to table collate_test3 +drop cascades to type testdomain_sv +drop cascades to table collate_test4 +drop cascades to table collate_test5 +drop cascades to table collate_test10 +drop cascades to table collate_test6 +drop cascades to view collview1 +drop cascades to view collview2 +drop cascades to view collview3 +drop cascades to type testdomain +drop cascades to function mylt(text,text) +drop cascades to function mylt_noninline(text,text) +drop cascades to function mylt_plpgsql(text,text) +drop cascades to function mylt2(text,text) +drop cascades to function dup(anyelement) diff --git a/src/test/regress/sql/collate.icu.sql b/src/test/regress/sql/collate.icu.sql new file mode 100644 index 00000000000..ef39445b301 --- /dev/null +++ b/src/test/regress/sql/collate.icu.sql @@ -0,0 +1,433 @@ +/* + * This test is for ICU collations. + */ + +SET client_encoding TO UTF8; + +CREATE SCHEMA collate_tests; +SET search_path = collate_tests; + + +CREATE TABLE collate_test1 ( + a int, + b text COLLATE "en-x-icu" NOT NULL +); + +\d collate_test1 + +CREATE TABLE collate_test_fail ( + a int, + b text COLLATE "ja_JP.eucjp-x-icu" +); + +CREATE TABLE collate_test_fail ( + a int, + b text COLLATE "foo-x-icu" +); + +CREATE TABLE collate_test_fail ( + a int COLLATE "en-x-icu", + b text +); + +CREATE TABLE collate_test_like ( + LIKE collate_test1 +); + +\d collate_test_like + +CREATE TABLE collate_test2 ( + a int, + b text COLLATE "sv-x-icu" +); + +CREATE TABLE collate_test3 ( + a int, + b text COLLATE "C" +); + +INSERT INTO collate_test1 VALUES (1, 'abc'), (2, 'äbc'), (3, 'bbc'), (4, 'ABC'); +INSERT INTO collate_test2 SELECT * FROM collate_test1; +INSERT INTO collate_test3 SELECT * FROM collate_test1; + +SELECT * FROM collate_test1 WHERE b >= 'bbc'; +SELECT * FROM collate_test2 WHERE b >= 'bbc'; +SELECT * FROM collate_test3 WHERE b >= 'bbc'; +SELECT * FROM collate_test3 WHERE b >= 'BBC'; + +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc'; +SELECT * FROM collate_test1 WHERE b >= 'bbc' COLLATE "C"; +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "C"; +SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc' COLLATE "en-x-icu"; + + +CREATE DOMAIN testdomain_sv AS text COLLATE "sv-x-icu"; +CREATE DOMAIN testdomain_i AS int COLLATE "sv-x-icu"; -- fails +CREATE TABLE collate_test4 ( + a int, + b testdomain_sv +); +INSERT INTO collate_test4 SELECT * FROM collate_test1; +SELECT a, b FROM collate_test4 ORDER BY b; + +CREATE TABLE collate_test5 ( + a int, + b testdomain_sv COLLATE "en-x-icu" +); +INSERT INTO collate_test5 SELECT * FROM collate_test1; +SELECT a, b FROM collate_test5 ORDER BY b; + + +SELECT a, b FROM collate_test1 ORDER BY b; +SELECT a, b FROM collate_test2 ORDER BY b; +SELECT a, b FROM collate_test3 ORDER BY b; + +SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C"; + +-- star expansion +SELECT * FROM collate_test1 ORDER BY b; +SELECT * FROM collate_test2 ORDER BY b; +SELECT * FROM collate_test3 ORDER BY b; + +-- constant expression folding +SELECT 'bbc' COLLATE "en-x-icu" > 'äbc' COLLATE "en-x-icu" AS "true"; +SELECT 'bbc' COLLATE "sv-x-icu" > 'äbc' COLLATE "sv-x-icu" AS "false"; + +-- upper/lower + +CREATE TABLE collate_test10 ( + a int, + x text COLLATE "en-x-icu", + y text COLLATE "tr-x-icu" +); + +INSERT INTO collate_test10 VALUES (1, 'hij', 'hij'), (2, 'HIJ', 'HIJ'); + +SELECT a, lower(x), lower(y), upper(x), upper(y), initcap(x), initcap(y) FROM collate_test10; +SELECT a, lower(x COLLATE "C"), lower(y COLLATE "C") FROM collate_test10; + +SELECT a, x, y FROM collate_test10 ORDER BY lower(y), a; + +-- LIKE/ILIKE + +SELECT * FROM collate_test1 WHERE b LIKE 'abc'; +SELECT * FROM collate_test1 WHERE b LIKE 'abc%'; +SELECT * FROM collate_test1 WHERE b LIKE '%bc%'; +SELECT * FROM collate_test1 WHERE b ILIKE 'abc'; +SELECT * FROM collate_test1 WHERE b ILIKE 'abc%'; +SELECT * FROM collate_test1 WHERE b ILIKE '%bc%'; + +SELECT 'Türkiye' COLLATE "en-x-icu" ILIKE '%KI%' AS "true"; +SELECT 'Türkiye' COLLATE "tr-x-icu" ILIKE '%KI%' AS "false"; + +SELECT 'bıt' ILIKE 'BIT' COLLATE "en-x-icu" AS "false"; +SELECT 'bıt' ILIKE 'BIT' COLLATE "tr-x-icu" AS "true"; + +-- The following actually exercises the selectivity estimation for ILIKE. +SELECT relname FROM pg_class WHERE relname ILIKE 'abc%'; + +-- regular expressions + +SELECT * FROM collate_test1 WHERE b ~ '^abc$'; +SELECT * FROM collate_test1 WHERE b ~ '^abc'; +SELECT * FROM collate_test1 WHERE b ~ 'bc'; +SELECT * FROM collate_test1 WHERE b ~* '^abc$'; +SELECT * FROM collate_test1 WHERE b ~* '^abc'; +SELECT * FROM collate_test1 WHERE b ~* 'bc'; + +CREATE TABLE collate_test6 ( + a int, + b text COLLATE "en-x-icu" +); +INSERT INTO collate_test6 VALUES (1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'), + (5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, ' '), + (9, 'äbç'), (10, 'ÄBÇ'); +SELECT b, + b ~ '^[[:alpha:]]+$' AS is_alpha, + b ~ '^[[:upper:]]+$' AS is_upper, + b ~ '^[[:lower:]]+$' AS is_lower, + b ~ '^[[:digit:]]+$' AS is_digit, + b ~ '^[[:alnum:]]+$' AS is_alnum, + b ~ '^[[:graph:]]+$' AS is_graph, + b ~ '^[[:print:]]+$' AS is_print, + b ~ '^[[:punct:]]+$' AS is_punct, + b ~ '^[[:space:]]+$' AS is_space +FROM collate_test6; + +SELECT 'Türkiye' COLLATE "en-x-icu" ~* 'KI' AS "true"; +SELECT 'Türkiye' COLLATE "tr-x-icu" ~* 'KI' AS "true"; -- true with ICU + +SELECT 'bıt' ~* 'BIT' COLLATE "en-x-icu" AS "false"; +SELECT 'bıt' ~* 'BIT' COLLATE "tr-x-icu" AS "false"; -- false with ICU + +-- The following actually exercises the selectivity estimation for ~*. +SELECT relname FROM pg_class WHERE relname ~* '^abc'; + + +/* not run by default because it requires tr_TR system locale +-- to_char + +SET lc_time TO 'tr_TR'; +SELECT to_char(date '2010-04-01', 'DD TMMON YYYY'); +SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr-x-icu"); +*/ + + +-- backwards parsing + +CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc'; +CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C"; +CREATE VIEW collview3 AS SELECT a, lower((x || x) COLLATE "C") FROM collate_test10; + +SELECT table_name, view_definition FROM information_schema.views + WHERE table_name LIKE 'collview%' ORDER BY 1; + + +-- collation propagation in various expression types + +SELECT a, coalesce(b, 'foo') FROM collate_test1 ORDER BY 2; +SELECT a, coalesce(b, 'foo') FROM collate_test2 ORDER BY 2; +SELECT a, coalesce(b, 'foo') FROM collate_test3 ORDER BY 2; +SELECT a, lower(coalesce(x, 'foo')), lower(coalesce(y, 'foo')) FROM collate_test10; + +SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3; +SELECT a, b, greatest(b, 'CCC') FROM collate_test2 ORDER BY 3; +SELECT a, b, greatest(b, 'CCC') FROM collate_test3 ORDER BY 3; +SELECT a, x, y, lower(greatest(x, 'foo')), lower(greatest(y, 'foo')) FROM collate_test10; + +SELECT a, nullif(b, 'abc') FROM collate_test1 ORDER BY 2; +SELECT a, nullif(b, 'abc') FROM collate_test2 ORDER BY 2; +SELECT a, nullif(b, 'abc') FROM collate_test3 ORDER BY 2; +SELECT a, lower(nullif(x, 'foo')), lower(nullif(y, 'foo')) FROM collate_test10; + +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test1 ORDER BY 2; +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test2 ORDER BY 2; +SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END FROM collate_test3 ORDER BY 2; + +CREATE DOMAIN testdomain AS text; +SELECT a, b::testdomain FROM collate_test1 ORDER BY 2; +SELECT a, b::testdomain FROM collate_test2 ORDER BY 2; +SELECT a, b::testdomain FROM collate_test3 ORDER BY 2; +SELECT a, b::testdomain_sv FROM collate_test3 ORDER BY 2; +SELECT a, lower(x::testdomain), lower(y::testdomain) FROM collate_test10; + +SELECT min(b), max(b) FROM collate_test1; +SELECT min(b), max(b) FROM collate_test2; +SELECT min(b), max(b) FROM collate_test3; + +SELECT array_agg(b ORDER BY b) FROM collate_test1; +SELECT array_agg(b ORDER BY b) FROM collate_test2; +SELECT array_agg(b ORDER BY b) FROM collate_test3; + +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2; +SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2; +SELECT a, b FROM collate_test3 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test3 WHERE a > 1 ORDER BY 2; +SELECT a, b FROM collate_test3 EXCEPT SELECT a, b FROM collate_test3 WHERE a < 2 ORDER BY 2; + +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- ok +SELECT a, b FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +SELECT a, b COLLATE "C" FROM collate_test1 UNION SELECT a, b FROM collate_test3 ORDER BY 2; -- ok +SELECT a, b FROM collate_test1 INTERSECT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail +SELECT a, b FROM collate_test1 EXCEPT SELECT a, b FROM collate_test3 ORDER BY 2; -- fail + +CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test3; -- fail + +-- ideally this would be a parse-time error, but for now it must be run-time: +select x < y from collate_test10; -- fail +select x || y from collate_test10; -- ok, because || is not collation aware +select x, y from collate_test10 order by x || y; -- not so ok + +-- collation mismatch between recursive and non-recursive term +WITH RECURSIVE foo(x) AS + (SELECT x FROM (VALUES('a' COLLATE "en-x-icu"),('b')) t(x) + UNION ALL + SELECT (x || 'c') COLLATE "de-x-icu" FROM foo WHERE length(x) < 10) +SELECT * FROM foo; + + +-- casting + +SELECT CAST('42' AS text COLLATE "C"); + +SELECT a, CAST(b AS varchar) FROM collate_test1 ORDER BY 2; +SELECT a, CAST(b AS varchar) FROM collate_test2 ORDER BY 2; +SELECT a, CAST(b AS varchar) FROM collate_test3 ORDER BY 2; + + +-- propagation of collation in SQL functions (inlined and non-inlined cases) +-- and plpgsql functions too + +CREATE FUNCTION mylt (text, text) RETURNS boolean LANGUAGE sql + AS $$ select $1 < $2 $$; + +CREATE FUNCTION mylt_noninline (text, text) RETURNS boolean LANGUAGE sql + AS $$ select $1 < $2 limit 1 $$; + +CREATE FUNCTION mylt_plpgsql (text, text) RETURNS boolean LANGUAGE plpgsql + AS $$ begin return $1 < $2; end $$; + +SELECT a.b AS a, b.b AS b, a.b < b.b AS lt, + mylt(a.b, b.b), mylt_noninline(a.b, b.b), mylt_plpgsql(a.b, b.b) +FROM collate_test1 a, collate_test1 b +ORDER BY a.b, b.b; + +SELECT a.b AS a, b.b AS b, a.b < b.b COLLATE "C" AS lt, + mylt(a.b, b.b COLLATE "C"), mylt_noninline(a.b, b.b COLLATE "C"), + mylt_plpgsql(a.b, b.b COLLATE "C") +FROM collate_test1 a, collate_test1 b +ORDER BY a.b, b.b; + + +-- collation override in plpgsql + +CREATE FUNCTION mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$ +declare + xx text := x; + yy text := y; +begin + return xx < yy; +end +$$; + +SELECT mylt2('a', 'B' collate "en-x-icu") as t, mylt2('a', 'B' collate "C") as f; + +CREATE OR REPLACE FUNCTION + mylt2 (x text, y text) RETURNS boolean LANGUAGE plpgsql AS $$ +declare + xx text COLLATE "POSIX" := x; + yy text := y; +begin + return xx < yy; +end +$$; + +SELECT mylt2('a', 'B') as f; +SELECT mylt2('a', 'B' collate "C") as fail; -- conflicting collations +SELECT mylt2('a', 'B' collate "POSIX") as f; + + +-- polymorphism + +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test1)) ORDER BY 1; +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test2)) ORDER BY 1; +SELECT * FROM unnest((SELECT array_agg(b ORDER BY b) FROM collate_test3)) ORDER BY 1; + +CREATE FUNCTION dup (anyelement) RETURNS anyelement + AS 'select $1' LANGUAGE sql; + +SELECT a, dup(b) FROM collate_test1 ORDER BY 2; +SELECT a, dup(b) FROM collate_test2 ORDER BY 2; +SELECT a, dup(b) FROM collate_test3 ORDER BY 2; + + +-- indexes + +CREATE INDEX collate_test1_idx1 ON collate_test1 (b); +CREATE INDEX collate_test1_idx2 ON collate_test1 (b COLLATE "C"); +CREATE INDEX collate_test1_idx3 ON collate_test1 ((b COLLATE "C")); -- this is different grammatically +CREATE INDEX collate_test1_idx4 ON collate_test1 (((b||'foo') COLLATE "POSIX")); + +CREATE INDEX collate_test1_idx5 ON collate_test1 (a COLLATE "C"); -- fail +CREATE INDEX collate_test1_idx6 ON collate_test1 ((a COLLATE "C")); -- fail + +SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_test%_idx%' ORDER BY 1; + + +-- schema manipulation commands + +CREATE ROLE regress_test_role; +CREATE SCHEMA test_schema; + +-- We need to do this this way to cope with varying names for encodings: +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' || + quote_literal(current_setting('lc_collate')) || ');'; +END +$$; +CREATE COLLATION test0 FROM "C"; -- fail, duplicate name +do $$ +BEGIN + EXECUTE 'CREATE COLLATION test1 (provider = icu, lc_collate = ' || + quote_literal(current_setting('lc_collate')) || + ', lc_ctype = ' || + quote_literal(current_setting('lc_ctype')) || ');'; +END +$$; +CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, need lc_ctype +CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */ DROP COLLATION testx; + +CREATE COLLATION test4 FROM nonsense; +CREATE COLLATION test5 FROM test0; + +SELECT collname FROM pg_collation WHERE collname LIKE 'test%' ORDER BY 1; + +ALTER COLLATION test1 RENAME TO test11; +ALTER COLLATION test0 RENAME TO test11; -- fail +ALTER COLLATION test1 RENAME TO test22; -- fail + +ALTER COLLATION test11 OWNER TO regress_test_role; +ALTER COLLATION test11 OWNER TO nonsense; +ALTER COLLATION test11 SET SCHEMA test_schema; + +COMMENT ON COLLATION test0 IS 'US English'; + +SELECT collname, nspname, obj_description(pg_collation.oid, 'pg_collation') + FROM pg_collation JOIN pg_namespace ON (collnamespace = pg_namespace.oid) + WHERE collname LIKE 'test%' + ORDER BY 1; + +DROP COLLATION test0, test_schema.test11, test5; +DROP COLLATION test0; -- fail +DROP COLLATION IF EXISTS test0; + +SELECT collname FROM pg_collation WHERE collname LIKE 'test%'; + +DROP SCHEMA test_schema; +DROP ROLE regress_test_role; + + +-- ALTER + +ALTER COLLATION "en-x-icu" REFRESH VERSION; + + +-- dependencies + +CREATE COLLATION test0 FROM "C"; + +CREATE TABLE collate_dep_test1 (a int, b text COLLATE test0); +CREATE DOMAIN collate_dep_dom1 AS text COLLATE test0; +CREATE TYPE collate_dep_test2 AS (x int, y text COLLATE test0); +CREATE VIEW collate_dep_test3 AS SELECT text 'foo' COLLATE test0 AS foo; +CREATE TABLE collate_dep_test4t (a int, b text); +CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0); + +DROP COLLATION test0 RESTRICT; -- fail +DROP COLLATION test0 CASCADE; + +\d collate_dep_test1 +\d collate_dep_test2 + +DROP TABLE collate_dep_test1, collate_dep_test4t; +DROP TYPE collate_dep_test2; + +-- test range types and collations + +create type textrange_c as range(subtype=text, collation="C"); +create type textrange_en_us as range(subtype=text, collation="en-x-icu"); + +select textrange_c('A','Z') @> 'b'::text; +select textrange_en_us('A','Z') @> 'b'::text; + +drop type textrange_c; +drop type textrange_en_us; + + +-- cleanup +DROP SCHEMA collate_tests CASCADE; +RESET search_path; + +-- leave a collation for pg_upgrade test +CREATE COLLATION coll_icu_upgrade FROM "und-x-icu"; diff --git a/src/test/regress/sql/collate.linux.utf8.sql b/src/test/regress/sql/collate.linux.utf8.sql index c349cbde2b9..b51162e3a1f 100644 --- a/src/test/regress/sql/collate.linux.utf8.sql +++ b/src/test/regress/sql/collate.linux.utf8.sql @@ -6,6 +6,9 @@ SET client_encoding TO UTF8; +CREATE SCHEMA collate_tests; +SET search_path = collate_tests; + CREATE TABLE collate_test1 ( a int, @@ -134,6 +137,25 @@ SELECT * FROM collate_test1 WHERE b ~* '^abc$'; SELECT * FROM collate_test1 WHERE b ~* '^abc'; SELECT * FROM collate_test1 WHERE b ~* 'bc'; +CREATE TABLE collate_test6 ( + a int, + b text COLLATE "en_US" +); +INSERT INTO collate_test6 VALUES (1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'), + (5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, ' '), + (9, 'äbç'), (10, 'ÄBÇ'); +SELECT b, + b ~ '^[[:alpha:]]+$' AS is_alpha, + b ~ '^[[:upper:]]+$' AS is_upper, + b ~ '^[[:lower:]]+$' AS is_lower, + b ~ '^[[:digit:]]+$' AS is_digit, + b ~ '^[[:alnum:]]+$' AS is_alnum, + b ~ '^[[:graph:]]+$' AS is_graph, + b ~ '^[[:print:]]+$' AS is_print, + b ~ '^[[:punct:]]+$' AS is_punct, + b ~ '^[[:space:]]+$' AS is_space +FROM collate_test6; + SELECT 'Türkiye' COLLATE "en_US" ~* 'KI' AS "true"; SELECT 'Türkiye' COLLATE "tr_TR" ~* 'KI' AS "false"; @@ -337,6 +359,7 @@ END $$; CREATE COLLATION test3 (lc_collate = 'en_US.utf8'); -- fail, need lc_ctype CREATE COLLATION testx (locale = 'nonsense'); -- fail +CREATE COLLATION testy (locale = 'en_US.utf8', version = 'foo'); -- fail, no versions for libc CREATE COLLATION test4 FROM nonsense; CREATE COLLATION test5 FROM test0; @@ -368,6 +391,11 @@ DROP SCHEMA test_schema; DROP ROLE regress_test_role; +-- ALTER + +ALTER COLLATION "en_US" REFRESH VERSION; + + -- dependencies CREATE COLLATION test0 FROM "C"; @@ -398,3 +426,7 @@ select textrange_en_us('A','Z') @> 'b'::text; drop type textrange_c; drop type textrange_en_us; + + +-- cleanup +DROP SCHEMA collate_tests CASCADE; |