aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/formatting.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/formatting.c')
-rw-r--r--src/backend/utils/adt/formatting.c453
1 files changed, 244 insertions, 209 deletions
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index c16bfbca933..0566abd314d 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -82,6 +82,10 @@
#include <wctype.h>
#endif
+#ifdef USE_ICU
+#include <unicode/ustring.h>
+#endif
+
#include "catalog/pg_collation.h"
#include "mb/pg_wchar.h"
#include "utils/builtins.h"
@@ -1443,6 +1447,42 @@ str_numth(char *dest, char *num, int type)
* upper/lower/initcap functions
*****************************************************************************/
+#ifdef USE_ICU
+static int32_t
+icu_convert_case(int32_t (*func)(UChar *, int32_t, const UChar *, int32_t, const char *, UErrorCode *),
+ pg_locale_t mylocale, UChar **buff_dest, UChar *buff_source, int32_t len_source)
+{
+ UErrorCode status;
+ int32_t len_dest;
+
+ len_dest = len_source; /* try first with same length */
+ *buff_dest = palloc(len_dest * sizeof(**buff_dest));
+ status = U_ZERO_ERROR;
+ len_dest = func(*buff_dest, len_dest, buff_source, len_source, mylocale->info.icu.locale, &status);
+ if (status == U_BUFFER_OVERFLOW_ERROR)
+ {
+ /* try again with adjusted length */
+ pfree(buff_dest);
+ buff_dest = palloc(len_dest * sizeof(**buff_dest));
+ status = U_ZERO_ERROR;
+ len_dest = func(*buff_dest, len_dest, buff_source, len_source, mylocale->info.icu.locale, &status);
+ }
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("case conversion failed: %s", u_errorName(status))));
+ return len_dest;
+}
+
+static int32_t
+u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode)
+{
+ return u_strToTitle(dest, destCapacity, src, srcLength, NULL, locale, pErrorCode);
+}
+#endif
+
/*
* If the system provides the needed functions for wide-character manipulation
* (which are all standardized by C99), then we implement upper/lower/initcap
@@ -1479,12 +1519,9 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
result = asc_tolower(buff, nbytes);
}
#ifdef USE_WIDE_UPPER_LOWER
- else if (pg_database_encoding_max_length() > 1)
+ else
{
pg_locale_t mylocale = 0;
- wchar_t *workspace;
- size_t curr_char;
- size_t result_size;
if (collid != DEFAULT_COLLATION_OID)
{
@@ -1502,77 +1539,79 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
mylocale = pg_newlocale_from_collation(collid);
}
- /* Overflow paranoia */
- if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
+#ifdef USE_ICU
+ if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t len_uchar;
+ int32_t len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
+
+ len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
+ len_conv = icu_convert_case(u_strToLower, mylocale, &buff_conv, buff_uchar, len_uchar);
+ icu_from_uchar(&result, buff_conv, len_conv);
+ }
+ else
+#endif
+ {
+ if (pg_database_encoding_max_length() > 1)
+ {
+ wchar_t *workspace;
+ size_t curr_char;
+ size_t result_size;
- /* Output workspace cannot have more codes than input bytes */
- workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
+ /* Overflow paranoia */
+ if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
- char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
+ /* Output workspace cannot have more codes than input bytes */
+ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
- for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
- {
+ char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
+
+ for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ {
#ifdef HAVE_LOCALE_T
- if (mylocale)
- workspace[curr_char] = towlower_l(workspace[curr_char], mylocale);
- else
+ if (mylocale)
+ workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
+ else
#endif
- workspace[curr_char] = towlower(workspace[curr_char]);
- }
+ workspace[curr_char] = towlower(workspace[curr_char]);
+ }
- /* Make result large enough; case change might change number of bytes */
- result_size = curr_char * pg_database_encoding_max_length() + 1;
- result = palloc(result_size);
+ /* Make result large enough; case change might change number of bytes */
+ result_size = curr_char * pg_database_encoding_max_length() + 1;
+ result = palloc(result_size);
- wchar2char(result, workspace, result_size, mylocale);
- pfree(workspace);
- }
+ wchar2char(result, workspace, result_size, mylocale);
+ pfree(workspace);
+ }
#endif /* USE_WIDE_UPPER_LOWER */
- else
- {
-#ifdef HAVE_LOCALE_T
- pg_locale_t mylocale = 0;
-#endif
- char *p;
-
- if (collid != DEFAULT_COLLATION_OID)
- {
- if (!OidIsValid(collid))
+ else
{
- /*
- * This typically means that the parser could not resolve a
- * conflict of implicit collations, so report it that way.
- */
- ereport(ERROR,
- (errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine which collation to use for lower() function"),
- errhint("Use the COLLATE clause to set the collation explicitly.")));
- }
-#ifdef HAVE_LOCALE_T
- mylocale = pg_newlocale_from_collation(collid);
-#endif
- }
+ char *p;
- result = pnstrdup(buff, nbytes);
+ result = pnstrdup(buff, nbytes);
- /*
- * Note: we assume that tolower_l() will not be so broken as to need
- * an isupper_l() guard test. When using the default collation, we
- * apply the traditional Postgres behavior that forces ASCII-style
- * treatment of I/i, but in non-default collations you get exactly
- * what the collation says.
- */
- for (p = result; *p; p++)
- {
+ /*
+ * Note: we assume that tolower_l() will not be so broken as to need
+ * an isupper_l() guard test. When using the default collation, we
+ * apply the traditional Postgres behavior that forces ASCII-style
+ * treatment of I/i, but in non-default collations you get exactly
+ * what the collation says.
+ */
+ for (p = result; *p; p++)
+ {
#ifdef HAVE_LOCALE_T
- if (mylocale)
- *p = tolower_l((unsigned char) *p, mylocale);
- else
+ if (mylocale)
+ *p = tolower_l((unsigned char) *p, mylocale->info.lt);
+ else
#endif
- *p = pg_tolower((unsigned char) *p);
+ *p = pg_tolower((unsigned char) *p);
+ }
+ }
}
}
@@ -1599,12 +1638,9 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
result = asc_toupper(buff, nbytes);
}
#ifdef USE_WIDE_UPPER_LOWER
- else if (pg_database_encoding_max_length() > 1)
+ else
{
pg_locale_t mylocale = 0;
- wchar_t *workspace;
- size_t curr_char;
- size_t result_size;
if (collid != DEFAULT_COLLATION_OID)
{
@@ -1622,77 +1658,78 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
mylocale = pg_newlocale_from_collation(collid);
}
- /* Overflow paranoia */
- if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
+#ifdef USE_ICU
+ if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t len_uchar, len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
- /* Output workspace cannot have more codes than input bytes */
- workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
+ len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
+ len_conv = icu_convert_case(u_strToUpper, mylocale, &buff_conv, buff_uchar, len_uchar);
+ icu_from_uchar(&result, buff_conv, len_conv);
+ }
+ else
+#endif
+ {
+ if (pg_database_encoding_max_length() > 1)
+ {
+ wchar_t *workspace;
+ size_t curr_char;
+ size_t result_size;
- char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
+ /* Overflow paranoia */
+ if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
- for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
- {
-#ifdef HAVE_LOCALE_T
- if (mylocale)
- workspace[curr_char] = towupper_l(workspace[curr_char], mylocale);
- else
-#endif
- workspace[curr_char] = towupper(workspace[curr_char]);
- }
+ /* Output workspace cannot have more codes than input bytes */
+ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
- /* Make result large enough; case change might change number of bytes */
- result_size = curr_char * pg_database_encoding_max_length() + 1;
- result = palloc(result_size);
+ char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
- wchar2char(result, workspace, result_size, mylocale);
- pfree(workspace);
- }
-#endif /* USE_WIDE_UPPER_LOWER */
- else
- {
+ for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ {
#ifdef HAVE_LOCALE_T
- pg_locale_t mylocale = 0;
+ if (mylocale)
+ workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
+ else
#endif
- char *p;
+ workspace[curr_char] = towupper(workspace[curr_char]);
+ }
- if (collid != DEFAULT_COLLATION_OID)
- {
- if (!OidIsValid(collid))
- {
- /*
- * This typically means that the parser could not resolve a
- * conflict of implicit collations, so report it that way.
- */
- ereport(ERROR,
- (errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine which collation to use for upper() function"),
- errhint("Use the COLLATE clause to set the collation explicitly.")));
+ /* Make result large enough; case change might change number of bytes */
+ result_size = curr_char * pg_database_encoding_max_length() + 1;
+ result = palloc(result_size);
+
+ wchar2char(result, workspace, result_size, mylocale);
+ pfree(workspace);
}
-#ifdef HAVE_LOCALE_T
- mylocale = pg_newlocale_from_collation(collid);
-#endif
- }
+#endif /* USE_WIDE_UPPER_LOWER */
+ else
+ {
+ char *p;
- result = pnstrdup(buff, nbytes);
+ result = pnstrdup(buff, nbytes);
- /*
- * Note: we assume that toupper_l() will not be so broken as to need
- * an islower_l() guard test. When using the default collation, we
- * apply the traditional Postgres behavior that forces ASCII-style
- * treatment of I/i, but in non-default collations you get exactly
- * what the collation says.
- */
- for (p = result; *p; p++)
- {
+ /*
+ * Note: we assume that toupper_l() will not be so broken as to need
+ * an islower_l() guard test. When using the default collation, we
+ * apply the traditional Postgres behavior that forces ASCII-style
+ * treatment of I/i, but in non-default collations you get exactly
+ * what the collation says.
+ */
+ for (p = result; *p; p++)
+ {
#ifdef HAVE_LOCALE_T
- if (mylocale)
- *p = toupper_l((unsigned char) *p, mylocale);
- else
+ if (mylocale)
+ *p = toupper_l((unsigned char) *p, mylocale->info.lt);
+ else
#endif
- *p = pg_toupper((unsigned char) *p);
+ *p = pg_toupper((unsigned char) *p);
+ }
+ }
}
}
@@ -1720,12 +1757,9 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
result = asc_initcap(buff, nbytes);
}
#ifdef USE_WIDE_UPPER_LOWER
- else if (pg_database_encoding_max_length() > 1)
+ else
{
pg_locale_t mylocale = 0;
- wchar_t *workspace;
- size_t curr_char;
- size_t result_size;
if (collid != DEFAULT_COLLATION_OID)
{
@@ -1743,100 +1777,101 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
mylocale = pg_newlocale_from_collation(collid);
}
- /* Overflow paranoia */
- if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
-
- /* Output workspace cannot have more codes than input bytes */
- workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
- char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
-
- for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+#ifdef USE_ICU
+ if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
{
-#ifdef HAVE_LOCALE_T
- if (mylocale)
- {
- if (wasalnum)
- workspace[curr_char] = towlower_l(workspace[curr_char], mylocale);
- else
- workspace[curr_char] = towupper_l(workspace[curr_char], mylocale);
- wasalnum = iswalnum_l(workspace[curr_char], mylocale);
- }
- else
+ int32_t len_uchar, len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
+
+ len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
+ len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale, &buff_conv, buff_uchar, len_uchar);
+ icu_from_uchar(&result, buff_conv, len_conv);
+ }
+ else
#endif
+ {
+ if (pg_database_encoding_max_length() > 1)
{
- if (wasalnum)
- workspace[curr_char] = towlower(workspace[curr_char]);
- else
- workspace[curr_char] = towupper(workspace[curr_char]);
- wasalnum = iswalnum(workspace[curr_char]);
- }
- }
+ wchar_t *workspace;
+ size_t curr_char;
+ size_t result_size;
- /* Make result large enough; case change might change number of bytes */
- result_size = curr_char * pg_database_encoding_max_length() + 1;
- result = palloc(result_size);
+ /* Overflow paranoia */
+ if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
- wchar2char(result, workspace, result_size, mylocale);
- pfree(workspace);
- }
-#endif /* USE_WIDE_UPPER_LOWER */
- else
- {
-#ifdef HAVE_LOCALE_T
- pg_locale_t mylocale = 0;
-#endif
- char *p;
+ /* Output workspace cannot have more codes than input bytes */
+ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
- if (collid != DEFAULT_COLLATION_OID)
- {
- if (!OidIsValid(collid))
- {
- /*
- * This typically means that the parser could not resolve a
- * conflict of implicit collations, so report it that way.
- */
- ereport(ERROR,
- (errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine which collation to use for initcap() function"),
- errhint("Use the COLLATE clause to set the collation explicitly.")));
- }
+ char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
+
+ for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ {
#ifdef HAVE_LOCALE_T
- mylocale = pg_newlocale_from_collation(collid);
+ if (mylocale)
+ {
+ if (wasalnum)
+ workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
+ else
+ workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
+ wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
+ }
+ else
#endif
- }
+ {
+ if (wasalnum)
+ workspace[curr_char] = towlower(workspace[curr_char]);
+ else
+ workspace[curr_char] = towupper(workspace[curr_char]);
+ wasalnum = iswalnum(workspace[curr_char]);
+ }
+ }
- result = pnstrdup(buff, nbytes);
+ /* Make result large enough; case change might change number of bytes */
+ result_size = curr_char * pg_database_encoding_max_length() + 1;
+ result = palloc(result_size);
- /*
- * Note: we assume that toupper_l()/tolower_l() will not be so broken
- * as to need guard tests. When using the default collation, we apply
- * the traditional Postgres behavior that forces ASCII-style treatment
- * of I/i, but in non-default collations you get exactly what the
- * collation says.
- */
- for (p = result; *p; p++)
- {
-#ifdef HAVE_LOCALE_T
- if (mylocale)
- {
- if (wasalnum)
- *p = tolower_l((unsigned char) *p, mylocale);
- else
- *p = toupper_l((unsigned char) *p, mylocale);
- wasalnum = isalnum_l((unsigned char) *p, mylocale);
+ wchar2char(result, workspace, result_size, mylocale);
+ pfree(workspace);
}
+#endif /* USE_WIDE_UPPER_LOWER */
else
-#endif
{
- if (wasalnum)
- *p = pg_tolower((unsigned char) *p);
- else
- *p = pg_toupper((unsigned char) *p);
- wasalnum = isalnum((unsigned char) *p);
+ char *p;
+
+ result = pnstrdup(buff, nbytes);
+
+ /*
+ * Note: we assume that toupper_l()/tolower_l() will not be so broken
+ * as to need guard tests. When using the default collation, we apply
+ * the traditional Postgres behavior that forces ASCII-style treatment
+ * of I/i, but in non-default collations you get exactly what the
+ * collation says.
+ */
+ for (p = result; *p; p++)
+ {
+#ifdef HAVE_LOCALE_T
+ if (mylocale)
+ {
+ if (wasalnum)
+ *p = tolower_l((unsigned char) *p, mylocale->info.lt);
+ else
+ *p = toupper_l((unsigned char) *p, mylocale->info.lt);
+ wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
+ }
+ else
+#endif
+ {
+ if (wasalnum)
+ *p = pg_tolower((unsigned char) *p);
+ else
+ *p = pg_toupper((unsigned char) *p);
+ wasalnum = isalnum((unsigned char) *p);
+ }
+ }
}
}
}