aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Eisentraut <peter@eisentraut.org>2019-03-17 08:16:33 +0100
committerPeter Eisentraut <peter@eisentraut.org>2019-03-17 08:47:15 +0100
commitb8f9a2a69a279d118e366a0d3d45caa84a7620b1 (patch)
tree1567d34de216333da585621e002cdb4d7b771c14
parent042162d6281a7daf1291931ee7b0a5641d3a73d7 (diff)
downloadpostgresql-b8f9a2a69a279d118e366a0d3d45caa84a7620b1.tar.gz
postgresql-b8f9a2a69a279d118e366a0d3d45caa84a7620b1.zip
Add support for collation attributes on older ICU versions
Starting in ICU 54, collation customization attributes can be specified in the locale string, for example "@colStrength=primary;colCaseLevel=yes". Add support for this for older ICU versions as well, by adding some minimal parsing of the attributes in the locale string and calling ucol_setAttribute() on them. This is essentially what never ICU versions do internally in ucol_open(). This was we can offer this functionality in a consistent way in all ICU versions supported by PostgreSQL. Also add some tests for ICU collation customization. Reported-by: Daniel Verite <daniel@manitou-mail.org> Discussion: https://www.postgresql.org/message-id/0270ebd4-f67c-8774-1a5a-91adfb9bb41f@2ndquadrant.com
-rw-r--r--src/backend/utils/adt/pg_locale.c104
-rw-r--r--src/test/regress/expected/collate.icu.utf8.out39
-rw-r--r--src/test/regress/sql/collate.icu.utf8.sql21
3 files changed, 164 insertions, 0 deletions
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 50b8b31645d..ec14bad4e34 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -58,6 +58,7 @@
#include "catalog/pg_control.h"
#include "mb/pg_wchar.h"
#include "utils/builtins.h"
+#include "utils/formatting.h"
#include "utils/hsearch.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
@@ -132,6 +133,9 @@ static HTAB *collation_cache = NULL;
static char *IsoLocaleName(const char *); /* MSVC specific */
#endif
+#ifdef USE_ICU
+static void icu_set_collation_attributes(UCollator *collator, const char *loc);
+#endif
/*
* pg_perm_setlocale
@@ -1380,6 +1384,9 @@ pg_newlocale_from_collation(Oid collid)
(errmsg("could not open collator for locale \"%s\": %s",
collcollate, u_errorName(status))));
+ if (U_ICU_VERSION_MAJOR_NUM < 54)
+ icu_set_collation_attributes(collator, collcollate);
+
/* We will leak this string if we get an error below :-( */
result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
collcollate);
@@ -1588,6 +1595,103 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
return len_result;
}
+/*
+ * Parse collation attributes and apply them to the open collator. This takes
+ * a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
+ * applies the key-value arguments.
+ *
+ * Starting with ICU version 54, the attributes are processed automatically by
+ * ucol_open(), so this is only necessary for emulating this behavior on older
+ * versions.
+ */
+pg_attribute_unused()
+static void
+icu_set_collation_attributes(UCollator *collator, const char *loc)
+{
+ char *str = asc_tolower(loc, strlen(loc));
+
+ str = strchr(str, '@');
+ if (!str)
+ return;
+ str++;
+
+ for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
+ {
+ char *e = strchr(token, '=');
+
+ if (e)
+ {
+ char *name;
+ char *value;
+ UColAttribute uattr = -1;
+ UColAttributeValue uvalue = -1;
+ UErrorCode status;
+
+ status = U_ZERO_ERROR;
+
+ *e = '\0';
+ name = token;
+ value = e + 1;
+
+ /*
+ * See attribute name and value lists in ICU i18n/coll.cpp
+ */
+ if (strcmp(name, "colstrength") == 0)
+ uattr = UCOL_STRENGTH;
+ else if (strcmp(name, "colbackwards") == 0)
+ uattr = UCOL_FRENCH_COLLATION;
+ else if (strcmp(name, "colcaselevel") == 0)
+ uattr = UCOL_CASE_LEVEL;
+ else if (strcmp(name, "colcasefirst") == 0)
+ uattr = UCOL_CASE_FIRST;
+ else if (strcmp(name, "colalternate") == 0)
+ uattr = UCOL_ALTERNATE_HANDLING;
+ else if (strcmp(name, "colnormalization") == 0)
+ uattr = UCOL_NORMALIZATION_MODE;
+ else if (strcmp(name, "colnumeric") == 0)
+ uattr = UCOL_NUMERIC_COLLATION;
+ /* ignore if unknown */
+
+ if (strcmp(value, "primary") == 0)
+ uvalue = UCOL_PRIMARY;
+ else if (strcmp(value, "secondary") == 0)
+ uvalue = UCOL_SECONDARY;
+ else if (strcmp(value, "tertiary") == 0)
+ uvalue = UCOL_TERTIARY;
+ else if (strcmp(value, "quaternary") == 0)
+ uvalue = UCOL_QUATERNARY;
+ else if (strcmp(value, "identical") == 0)
+ uvalue = UCOL_IDENTICAL;
+ else if (strcmp(value, "no") == 0)
+ uvalue = UCOL_OFF;
+ else if (strcmp(value, "yes") == 0)
+ uvalue = UCOL_ON;
+ else if (strcmp(value, "shifted") == 0)
+ uvalue = UCOL_SHIFTED;
+ else if (strcmp(value, "non-ignorable") == 0)
+ uvalue = UCOL_NON_IGNORABLE;
+ else if (strcmp(value, "lower") == 0)
+ uvalue = UCOL_LOWER_FIRST;
+ else if (strcmp(value, "upper") == 0)
+ uvalue = UCOL_UPPER_FIRST;
+ else
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+
+ if (uattr != -1 && uvalue != -1)
+ ucol_setAttribute(collator, uattr, uvalue, &status);
+
+ /*
+ * Pretend the error came from ucol_open(), for consistent error
+ * message across ICU versions.
+ */
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not open collator for locale \"%s\": %s",
+ loc, u_errorName(status))));
+ }
+ }
+}
+
#endif /* USE_ICU */
/*
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index f95d1652885..4b94921cf88 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1100,6 +1100,45 @@ select textrange_en_us('A','Z') @> 'b'::text;
drop type textrange_c;
drop type textrange_en_us;
+-- test ICU collation customization
+CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
+SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
+ ?column? | ?column?
+----------+----------
+ t | t
+(1 row)
+
+CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
+SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
+ ?column? | ?column?
+----------+----------
+ t | t
+(1 row)
+
+CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
+CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
+SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
+ ?column? | ?column?
+----------+----------
+ t | t
+(1 row)
+
+CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
+SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
+ ?column? | ?column?
+----------+----------
+ t | t
+(1 row)
+
+CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
+SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
+ ?column? | ?column?
+----------+----------
+ t | t
+(1 row)
+
+CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
+ERROR: could not open collator for locale "@colNumeric=lower": U_ILLEGAL_ARGUMENT_ERROR
-- cleanup
SET client_min_messages TO warning;
DROP SCHEMA collate_tests CASCADE;
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 0aeba3e202b..73fb1232a7d 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -425,6 +425,27 @@ drop type textrange_c;
drop type textrange_en_us;
+-- test ICU collation customization
+
+CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
+SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
+
+CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
+SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
+
+CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
+CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
+SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
+
+CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
+SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
+
+CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
+SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
+
+CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
+
+
-- cleanup
SET client_min_messages TO warning;
DROP SCHEMA collate_tests CASCADE;