diff options
Diffstat (limited to 'src/backend/utils/adt/formatting.c')
-rw-r--r-- | src/backend/utils/adt/formatting.c | 453 |
1 files changed, 244 insertions, 209 deletions
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index c16bfbca933..0566abd314d 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -82,6 +82,10 @@ #include <wctype.h> #endif +#ifdef USE_ICU +#include <unicode/ustring.h> +#endif + #include "catalog/pg_collation.h" #include "mb/pg_wchar.h" #include "utils/builtins.h" @@ -1443,6 +1447,42 @@ str_numth(char *dest, char *num, int type) * upper/lower/initcap functions *****************************************************************************/ +#ifdef USE_ICU +static int32_t +icu_convert_case(int32_t (*func)(UChar *, int32_t, const UChar *, int32_t, const char *, UErrorCode *), + pg_locale_t mylocale, UChar **buff_dest, UChar *buff_source, int32_t len_source) +{ + UErrorCode status; + int32_t len_dest; + + len_dest = len_source; /* try first with same length */ + *buff_dest = palloc(len_dest * sizeof(**buff_dest)); + status = U_ZERO_ERROR; + len_dest = func(*buff_dest, len_dest, buff_source, len_source, mylocale->info.icu.locale, &status); + if (status == U_BUFFER_OVERFLOW_ERROR) + { + /* try again with adjusted length */ + pfree(buff_dest); + buff_dest = palloc(len_dest * sizeof(**buff_dest)); + status = U_ZERO_ERROR; + len_dest = func(*buff_dest, len_dest, buff_source, len_source, mylocale->info.icu.locale, &status); + } + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("case conversion failed: %s", u_errorName(status)))); + return len_dest; +} + +static int32_t +u_strToTitle_default_BI(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode) +{ + return u_strToTitle(dest, destCapacity, src, srcLength, NULL, locale, pErrorCode); +} +#endif + /* * If the system provides the needed functions for wide-character manipulation * (which are all standardized by C99), then we implement upper/lower/initcap @@ -1479,12 +1519,9 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) result = asc_tolower(buff, nbytes); } #ifdef USE_WIDE_UPPER_LOWER - else if (pg_database_encoding_max_length() > 1) + else { pg_locale_t mylocale = 0; - wchar_t *workspace; - size_t curr_char; - size_t result_size; if (collid != DEFAULT_COLLATION_OID) { @@ -1502,77 +1539,79 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) mylocale = pg_newlocale_from_collation(collid); } - /* Overflow paranoia */ - if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); +#ifdef USE_ICU + if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + { + int32_t len_uchar; + int32_t len_conv; + UChar *buff_uchar; + UChar *buff_conv; + + len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); + len_conv = icu_convert_case(u_strToLower, mylocale, &buff_conv, buff_uchar, len_uchar); + icu_from_uchar(&result, buff_conv, len_conv); + } + else +#endif + { + if (pg_database_encoding_max_length() > 1) + { + wchar_t *workspace; + size_t curr_char; + size_t result_size; - /* Output workspace cannot have more codes than input bytes */ - workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); + /* Overflow paranoia */ + if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); - char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - for (curr_char = 0; workspace[curr_char] != 0; curr_char++) - { + char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { #ifdef HAVE_LOCALE_T - if (mylocale) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale); - else + if (mylocale) + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); + else #endif - workspace[curr_char] = towlower(workspace[curr_char]); - } + workspace[curr_char] = towlower(workspace[curr_char]); + } - /* Make result large enough; case change might change number of bytes */ - result_size = curr_char * pg_database_encoding_max_length() + 1; - result = palloc(result_size); + /* Make result large enough; case change might change number of bytes */ + result_size = curr_char * pg_database_encoding_max_length() + 1; + result = palloc(result_size); - wchar2char(result, workspace, result_size, mylocale); - pfree(workspace); - } + wchar2char(result, workspace, result_size, mylocale); + pfree(workspace); + } #endif /* USE_WIDE_UPPER_LOWER */ - else - { -#ifdef HAVE_LOCALE_T - pg_locale_t mylocale = 0; -#endif - char *p; - - if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) + else { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for lower() function"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } -#ifdef HAVE_LOCALE_T - mylocale = pg_newlocale_from_collation(collid); -#endif - } + char *p; - result = pnstrdup(buff, nbytes); + result = pnstrdup(buff, nbytes); - /* - * Note: we assume that tolower_l() will not be so broken as to need - * an isupper_l() guard test. When using the default collation, we - * apply the traditional Postgres behavior that forces ASCII-style - * treatment of I/i, but in non-default collations you get exactly - * what the collation says. - */ - for (p = result; *p; p++) - { + /* + * Note: we assume that tolower_l() will not be so broken as to need + * an isupper_l() guard test. When using the default collation, we + * apply the traditional Postgres behavior that forces ASCII-style + * treatment of I/i, but in non-default collations you get exactly + * what the collation says. + */ + for (p = result; *p; p++) + { #ifdef HAVE_LOCALE_T - if (mylocale) - *p = tolower_l((unsigned char) *p, mylocale); - else + if (mylocale) + *p = tolower_l((unsigned char) *p, mylocale->info.lt); + else #endif - *p = pg_tolower((unsigned char) *p); + *p = pg_tolower((unsigned char) *p); + } + } } } @@ -1599,12 +1638,9 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) result = asc_toupper(buff, nbytes); } #ifdef USE_WIDE_UPPER_LOWER - else if (pg_database_encoding_max_length() > 1) + else { pg_locale_t mylocale = 0; - wchar_t *workspace; - size_t curr_char; - size_t result_size; if (collid != DEFAULT_COLLATION_OID) { @@ -1622,77 +1658,78 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) mylocale = pg_newlocale_from_collation(collid); } - /* Overflow paranoia */ - if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); +#ifdef USE_ICU + if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + { + int32_t len_uchar, len_conv; + UChar *buff_uchar; + UChar *buff_conv; - /* Output workspace cannot have more codes than input bytes */ - workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); + len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); + len_conv = icu_convert_case(u_strToUpper, mylocale, &buff_conv, buff_uchar, len_uchar); + icu_from_uchar(&result, buff_conv, len_conv); + } + else +#endif + { + if (pg_database_encoding_max_length() > 1) + { + wchar_t *workspace; + size_t curr_char; + size_t result_size; - char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + /* Overflow paranoia */ + if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); - for (curr_char = 0; workspace[curr_char] != 0; curr_char++) - { -#ifdef HAVE_LOCALE_T - if (mylocale) - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale); - else -#endif - workspace[curr_char] = towupper(workspace[curr_char]); - } + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - /* Make result large enough; case change might change number of bytes */ - result_size = curr_char * pg_database_encoding_max_length() + 1; - result = palloc(result_size); + char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); - wchar2char(result, workspace, result_size, mylocale); - pfree(workspace); - } -#endif /* USE_WIDE_UPPER_LOWER */ - else - { + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { #ifdef HAVE_LOCALE_T - pg_locale_t mylocale = 0; + if (mylocale) + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); + else #endif - char *p; + workspace[curr_char] = towupper(workspace[curr_char]); + } - if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for upper() function"), - errhint("Use the COLLATE clause to set the collation explicitly."))); + /* Make result large enough; case change might change number of bytes */ + result_size = curr_char * pg_database_encoding_max_length() + 1; + result = palloc(result_size); + + wchar2char(result, workspace, result_size, mylocale); + pfree(workspace); } -#ifdef HAVE_LOCALE_T - mylocale = pg_newlocale_from_collation(collid); -#endif - } +#endif /* USE_WIDE_UPPER_LOWER */ + else + { + char *p; - result = pnstrdup(buff, nbytes); + result = pnstrdup(buff, nbytes); - /* - * Note: we assume that toupper_l() will not be so broken as to need - * an islower_l() guard test. When using the default collation, we - * apply the traditional Postgres behavior that forces ASCII-style - * treatment of I/i, but in non-default collations you get exactly - * what the collation says. - */ - for (p = result; *p; p++) - { + /* + * Note: we assume that toupper_l() will not be so broken as to need + * an islower_l() guard test. When using the default collation, we + * apply the traditional Postgres behavior that forces ASCII-style + * treatment of I/i, but in non-default collations you get exactly + * what the collation says. + */ + for (p = result; *p; p++) + { #ifdef HAVE_LOCALE_T - if (mylocale) - *p = toupper_l((unsigned char) *p, mylocale); - else + if (mylocale) + *p = toupper_l((unsigned char) *p, mylocale->info.lt); + else #endif - *p = pg_toupper((unsigned char) *p); + *p = pg_toupper((unsigned char) *p); + } + } } } @@ -1720,12 +1757,9 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) result = asc_initcap(buff, nbytes); } #ifdef USE_WIDE_UPPER_LOWER - else if (pg_database_encoding_max_length() > 1) + else { pg_locale_t mylocale = 0; - wchar_t *workspace; - size_t curr_char; - size_t result_size; if (collid != DEFAULT_COLLATION_OID) { @@ -1743,100 +1777,101 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) mylocale = pg_newlocale_from_collation(collid); } - /* Overflow paranoia */ - if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - - /* Output workspace cannot have more codes than input bytes */ - workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - - char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); - - for (curr_char = 0; workspace[curr_char] != 0; curr_char++) +#ifdef USE_ICU + if (mylocale && mylocale->provider == COLLPROVIDER_ICU) { -#ifdef HAVE_LOCALE_T - if (mylocale) - { - if (wasalnum) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale); - else - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale); - wasalnum = iswalnum_l(workspace[curr_char], mylocale); - } - else + int32_t len_uchar, len_conv; + UChar *buff_uchar; + UChar *buff_conv; + + len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); + len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale, &buff_conv, buff_uchar, len_uchar); + icu_from_uchar(&result, buff_conv, len_conv); + } + else #endif + { + if (pg_database_encoding_max_length() > 1) { - if (wasalnum) - workspace[curr_char] = towlower(workspace[curr_char]); - else - workspace[curr_char] = towupper(workspace[curr_char]); - wasalnum = iswalnum(workspace[curr_char]); - } - } + wchar_t *workspace; + size_t curr_char; + size_t result_size; - /* Make result large enough; case change might change number of bytes */ - result_size = curr_char * pg_database_encoding_max_length() + 1; - result = palloc(result_size); + /* Overflow paranoia */ + if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t))) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); - wchar2char(result, workspace, result_size, mylocale); - pfree(workspace); - } -#endif /* USE_WIDE_UPPER_LOWER */ - else - { -#ifdef HAVE_LOCALE_T - pg_locale_t mylocale = 0; -#endif - char *p; + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for initcap() function"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } + char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + { #ifdef HAVE_LOCALE_T - mylocale = pg_newlocale_from_collation(collid); + if (mylocale) + { + if (wasalnum) + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); + else + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); + wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt); + } + else #endif - } + { + if (wasalnum) + workspace[curr_char] = towlower(workspace[curr_char]); + else + workspace[curr_char] = towupper(workspace[curr_char]); + wasalnum = iswalnum(workspace[curr_char]); + } + } - result = pnstrdup(buff, nbytes); + /* Make result large enough; case change might change number of bytes */ + result_size = curr_char * pg_database_encoding_max_length() + 1; + result = palloc(result_size); - /* - * Note: we assume that toupper_l()/tolower_l() will not be so broken - * as to need guard tests. When using the default collation, we apply - * the traditional Postgres behavior that forces ASCII-style treatment - * of I/i, but in non-default collations you get exactly what the - * collation says. - */ - for (p = result; *p; p++) - { -#ifdef HAVE_LOCALE_T - if (mylocale) - { - if (wasalnum) - *p = tolower_l((unsigned char) *p, mylocale); - else - *p = toupper_l((unsigned char) *p, mylocale); - wasalnum = isalnum_l((unsigned char) *p, mylocale); + wchar2char(result, workspace, result_size, mylocale); + pfree(workspace); } +#endif /* USE_WIDE_UPPER_LOWER */ else -#endif { - if (wasalnum) - *p = pg_tolower((unsigned char) *p); - else - *p = pg_toupper((unsigned char) *p); - wasalnum = isalnum((unsigned char) *p); + char *p; + + result = pnstrdup(buff, nbytes); + + /* + * Note: we assume that toupper_l()/tolower_l() will not be so broken + * as to need guard tests. When using the default collation, we apply + * the traditional Postgres behavior that forces ASCII-style treatment + * of I/i, but in non-default collations you get exactly what the + * collation says. + */ + for (p = result; *p; p++) + { +#ifdef HAVE_LOCALE_T + if (mylocale) + { + if (wasalnum) + *p = tolower_l((unsigned char) *p, mylocale->info.lt); + else + *p = toupper_l((unsigned char) *p, mylocale->info.lt); + wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt); + } + else +#endif + { + if (wasalnum) + *p = pg_tolower((unsigned char) *p); + else + *p = pg_toupper((unsigned char) *p); + wasalnum = isalnum((unsigned char) *p); + } + } } } } |