diff options
author | Jeff Davis <jdavis@postgresql.org> | 2023-02-23 10:55:20 -0800 |
---|---|---|
committer | Jeff Davis <jdavis@postgresql.org> | 2023-02-23 10:55:20 -0800 |
commit | d87d548cd0304477413a73e9c1d148fb2d40b50d (patch) | |
tree | 110613f01e1fc49b20eb95e416227eaf96e469d0 /src/backend/utils/adt/pg_locale.c | |
parent | e9960732a9618d5f744ff43a09622c9185798760 (diff) | |
download | postgresql-d87d548cd0304477413a73e9c1d148fb2d40b50d.tar.gz postgresql-d87d548cd0304477413a73e9c1d148fb2d40b50d.zip |
Refactor to add pg_strcoll(), pg_strxfrm(), and variants.
Offers a generally better separation of responsibilities for collation
code. Also, a step towards multi-lib ICU, which should be based on a
clean separation of the routines required for collation providers.
Callers with NUL-terminated strings should call pg_strcoll() or
pg_strxfrm(); callers with strings and their length should call the
variants pg_strncoll() or pg_strnxfrm().
Reviewed-by: Peter Eisentraut, Peter Geoghegan
Discussion: https://postgr.es/m/a581136455c940d7bd0ff482d3a2bd51af25a94f.camel%40j-davis.com
Diffstat (limited to 'src/backend/utils/adt/pg_locale.c')
-rw-r--r-- | src/backend/utils/adt/pg_locale.c | 769 |
1 files changed, 750 insertions, 19 deletions
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 059e4fd79f0..ef9efb4a7c9 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -79,6 +79,12 @@ #include <shlwapi.h> #endif +/* + * This should be large enough that most strings will fit, but small enough + * that we feel comfortable putting it on the stack + */ +#define TEXTBUFLEN 1024 + #define MAX_L10N_DATA 80 @@ -123,6 +129,19 @@ static char *IsoLocaleName(const char *); #endif #ifdef USE_ICU +/* + * Converter object for converting between ICU's UChar strings and C strings + * in database encoding. Since the database encoding doesn't change, we only + * need one of these per session. + */ +static UConverter *icu_converter = NULL; + +static void init_icu_converter(void); +static size_t uchar_length(UConverter *converter, + const char *str, int32_t len); +static int32_t uchar_convert(UConverter *converter, + UChar *dest, int32_t destlen, + const char *str, int32_t srclen); static void icu_set_collation_attributes(UCollator *collator, const char *loc); #endif @@ -1731,15 +1750,705 @@ get_collation_actual_version(char collprovider, const char *collcollate) return collversion; } +/* + * pg_strncoll_libc_win32_utf8 + * + * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and + * invoke wcscoll() or wcscoll_l(). + */ +#ifdef WIN32 +static int +pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, + size_t len2, pg_locale_t locale) +{ + char sbuf[TEXTBUFLEN]; + char *buf = sbuf; + char *a1p, + *a2p; + int a1len = len1 * 2 + 2; + int a2len = len2 * 2 + 2; + int r; + int result; + + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(GetDatabaseEncoding() == PG_UTF8); +#ifndef WIN32 + Assert(false); +#endif + + if (a1len + a2len > TEXTBUFLEN) + buf = palloc(a1len + a2len); + + a1p = buf; + a2p = buf + a1len; + + /* API does not work for zero-length input */ + if (len1 == 0) + r = 0; + else + { + r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1, + (LPWSTR) a1p, a1len / 2); + if (!r) + ereport(ERROR, + (errmsg("could not convert string to UTF-16: error code %lu", + GetLastError()))); + } + ((LPWSTR) a1p)[r] = 0; + + if (len2 == 0) + r = 0; + else + { + r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2, + (LPWSTR) a2p, a2len / 2); + if (!r) + ereport(ERROR, + (errmsg("could not convert string to UTF-16: error code %lu", + GetLastError()))); + } + ((LPWSTR) a2p)[r] = 0; + + errno = 0; +#ifdef HAVE_LOCALE_T + if (locale) + result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt); + else +#endif + result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p); + if (result == 2147483647) /* _NLSCMPERROR; missing from mingw + * headers */ + ereport(ERROR, + (errmsg("could not compare Unicode strings: %m"))); + + if (buf != sbuf) + pfree(buf); + + return result; +} +#endif /* WIN32 */ + +/* + * pg_strcoll_libc + * + * Call strcoll(), strcoll_l(), wcscoll(), or wcscoll_l() as appropriate for + * the given locale, platform, and database encoding. If the locale is NULL, + * use the database collation. + * + * Arguments must be encoded in the database encoding and nul-terminated. + */ +static int +pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale) +{ + int result; + + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); +#ifdef WIN32 + if (GetDatabaseEncoding() == PG_UTF8) + { + size_t len1 = strlen(arg1); + size_t len2 = strlen(arg2); + result = pg_strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale); + } + else +#endif /* WIN32 */ + if (locale) + { +#ifdef HAVE_LOCALE_T + result = strcoll_l(arg1, arg2, locale->info.lt); +#else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); +#endif + } + else + result = strcoll(arg1, arg2); + + return result; +} + +/* + * pg_strncoll_libc + * + * Nul-terminate the arguments and call pg_strcoll_libc(). + */ +static int +pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2, + pg_locale_t locale) +{ + char sbuf[TEXTBUFLEN]; + char *buf = sbuf; + size_t bufsize1 = len1 + 1; + size_t bufsize2 = len2 + 1; + char *arg1n; + char *arg2n; + int result; + + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + +#ifdef WIN32 + /* check for this case before doing the work for nul-termination */ + if (GetDatabaseEncoding() == PG_UTF8) + return pg_strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale); +#endif /* WIN32 */ + + if (bufsize1 + bufsize2 > TEXTBUFLEN) + buf = palloc(bufsize1 + bufsize2); + + arg1n = buf; + arg2n = buf + bufsize1; + + /* nul-terminate arguments */ + memcpy(arg1n, arg1, len1); + arg1n[len1] = '\0'; + memcpy(arg2n, arg2, len2); + arg2n[len2] = '\0'; + + result = pg_strcoll_libc(arg1n, arg2n, locale); + + if (buf != sbuf) + pfree(buf); + + return result; +} #ifdef USE_ICU + /* - * Converter object for converting between ICU's UChar strings and C strings - * in database encoding. Since the database encoding doesn't change, we only - * need one of these per session. + * pg_strncoll_icu_no_utf8 + * + * Convert the arguments from the database encoding to UChar strings, then + * call ucol_strcoll(). An argument length of -1 means that the string is + * NUL-terminated. + * + * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(), + * caller should call that instead. */ -static UConverter *icu_converter = NULL; +static int +pg_strncoll_icu_no_utf8(const char *arg1, int32_t len1, + const char *arg2, int32_t len2, pg_locale_t locale) +{ + char sbuf[TEXTBUFLEN]; + char *buf = sbuf; + int32_t ulen1; + int32_t ulen2; + size_t bufsize1; + size_t bufsize2; + UChar *uchar1, + *uchar2; + int result; + + Assert(locale->provider == COLLPROVIDER_ICU); +#ifdef HAVE_UCOL_STRCOLLUTF8 + Assert(GetDatabaseEncoding() != PG_UTF8); +#endif + + init_icu_converter(); + + ulen1 = uchar_length(icu_converter, arg1, len1); + ulen2 = uchar_length(icu_converter, arg2, len2); + + bufsize1 = (ulen1 + 1) * sizeof(UChar); + bufsize2 = (ulen2 + 1) * sizeof(UChar); + + if (bufsize1 + bufsize2 > TEXTBUFLEN) + buf = palloc(bufsize1 + bufsize2); + + uchar1 = (UChar *) buf; + uchar2 = (UChar *) (buf + bufsize1); + + ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1); + ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2); + + result = ucol_strcoll(locale->info.icu.ucol, + uchar1, ulen1, + uchar2, ulen2); + + if (buf != sbuf) + pfree(buf); + + return result; +} + +/* + * pg_strncoll_icu + * + * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given + * database encoding. An argument length of -1 means the string is + * NUL-terminated. + * + * Arguments must be encoded in the database encoding. + */ +static int +pg_strncoll_icu(const char *arg1, int32_t len1, const char *arg2, int32_t len2, + pg_locale_t locale) +{ + int result; + + Assert(locale->provider == COLLPROVIDER_ICU); + +#ifdef HAVE_UCOL_STRCOLLUTF8 + if (GetDatabaseEncoding() == PG_UTF8) + { + UErrorCode status; + + status = U_ZERO_ERROR; + result = ucol_strcollUTF8(locale->info.icu.ucol, + arg1, len1, + arg2, len2, + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("collation failed: %s", u_errorName(status)))); + } + else +#endif + { + result = pg_strncoll_icu_no_utf8(arg1, len1, arg2, len2, locale); + } + + return result; +} + +#endif /* USE_ICU */ + +/* + * pg_strcoll + * + * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll(), strcoll_l(), wcscoll(), + * or wcscoll_l() as appropriate for the given locale, platform, and database + * encoding. If the locale is not specified, use the database collation. + * + * Arguments must be encoded in the database encoding and nul-terminated. + * + * The caller is responsible for breaking ties if the collation is + * deterministic; this maintains consistency with pg_strxfrm(), which cannot + * easily account for deterministic collations. + */ +int +pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale) +{ + int result; + + if (!locale || locale->provider == COLLPROVIDER_LIBC) + result = pg_strcoll_libc(arg1, arg2, locale); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strncoll_icu(arg1, -1, arg2, -1, locale); +#endif + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); + + return result; +} + +/* + * pg_strncoll + * + * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll(), strcoll_l(), wcscoll(), + * or wcscoll_l() as appropriate for the given locale, platform, and database + * encoding. If the locale is not specified, use the database collation. + * + * Arguments must be encoded in the database encoding. + * + * This function may need to nul-terminate the arguments for libc functions; + * so if the caller already has nul-terminated strings, it should call + * pg_strcoll() instead. + * + * The caller is responsible for breaking ties if the collation is + * deterministic; this maintains consistency with pg_strnxfrm(), which cannot + * easily account for deterministic collations. + */ +int +pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2, + pg_locale_t locale) +{ + int result; + + if (!locale || locale->provider == COLLPROVIDER_LIBC) + result = pg_strncoll_libc(arg1, len1, arg2, len2, locale); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strncoll_icu(arg1, len1, arg2, len2, locale); +#endif + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); + + return result; +} + + +static size_t +pg_strxfrm_libc(char *dest, const char *src, size_t destsize, + pg_locale_t locale) +{ + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + +#ifdef TRUST_STRXFRM +#ifdef HAVE_LOCALE_T + if (locale) + return strxfrm_l(dest, src, destsize, locale->info.lt); + else +#endif + return strxfrm(dest, src, destsize); +#else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); +#endif +} + +static size_t +pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize, + pg_locale_t locale) +{ + char sbuf[TEXTBUFLEN]; + char *buf = sbuf; + size_t bufsize = srclen + 1; + size_t result; + + Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + + if (bufsize > TEXTBUFLEN) + buf = palloc(bufsize); + /* nul-terminate arguments */ + memcpy(buf, src, srclen); + buf[srclen] = '\0'; + + result = pg_strxfrm_libc(dest, buf, destsize, locale); + + if (buf != sbuf) + pfree(buf); + + /* if dest is defined, it should be nul-terminated */ + Assert(result >= destsize || dest[result] == '\0'); + + return result; +} + +#ifdef USE_ICU + +/* 'srclen' of -1 means the strings are NUL-terminated */ +static size_t +pg_strnxfrm_icu(char *dest, const char *src, int32_t srclen, int32_t destsize, + pg_locale_t locale) +{ + char sbuf[TEXTBUFLEN]; + char *buf = sbuf; + UChar *uchar; + int32_t ulen; + size_t uchar_bsize; + Size result_bsize; + + Assert(locale->provider == COLLPROVIDER_ICU); + + init_icu_converter(); + + ulen = uchar_length(icu_converter, src, srclen); + + uchar_bsize = (ulen + 1) * sizeof(UChar); + + if (uchar_bsize > TEXTBUFLEN) + buf = palloc(uchar_bsize); + + uchar = (UChar *) buf; + + ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen); + + result_bsize = ucol_getSortKey(locale->info.icu.ucol, + uchar, ulen, + (uint8_t *) dest, destsize); + + /* + * ucol_getSortKey() counts the nul-terminator in the result length, but + * this function should not. + */ + Assert(result_bsize > 0); + result_bsize--; + + if (buf != sbuf) + pfree(buf); + + /* if dest is defined, it should be nul-terminated */ + Assert(result_bsize >= destsize || dest[result_bsize] == '\0'); + + return result_bsize; +} + +/* 'srclen' of -1 means the strings are NUL-terminated */ +static size_t +pg_strnxfrm_prefix_icu_no_utf8(char *dest, const char *src, int32_t srclen, + int32_t destsize, pg_locale_t locale) +{ + char sbuf[TEXTBUFLEN]; + char *buf = sbuf; + UCharIterator iter; + uint32_t state[2]; + UErrorCode status; + int32_t ulen = -1; + UChar *uchar = NULL; + size_t uchar_bsize; + Size result_bsize; + + Assert(locale->provider == COLLPROVIDER_ICU); + Assert(GetDatabaseEncoding() != PG_UTF8); + + init_icu_converter(); + + ulen = uchar_length(icu_converter, src, srclen); + + uchar_bsize = (ulen + 1) * sizeof(UChar); + + if (uchar_bsize > TEXTBUFLEN) + buf = palloc(uchar_bsize); + + uchar = (UChar *) buf; + + ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen); + + uiter_setString(&iter, uchar, ulen); + state[0] = state[1] = 0; /* won't need that again */ + status = U_ZERO_ERROR; + result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol, + &iter, + state, + (uint8_t *) dest, + destsize, + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("sort key generation failed: %s", + u_errorName(status)))); + + return result_bsize; +} + +/* 'srclen' of -1 means the strings are NUL-terminated */ +static size_t +pg_strnxfrm_prefix_icu(char *dest, const char *src, int32_t srclen, + int32_t destsize, pg_locale_t locale) +{ + size_t result; + + Assert(locale->provider == COLLPROVIDER_ICU); + + if (GetDatabaseEncoding() == PG_UTF8) + { + UCharIterator iter; + uint32_t state[2]; + UErrorCode status; + + uiter_setUTF8(&iter, src, srclen); + state[0] = state[1] = 0; /* won't need that again */ + status = U_ZERO_ERROR; + result = ucol_nextSortKeyPart(locale->info.icu.ucol, + &iter, + state, + (uint8_t *) dest, + destsize, + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("sort key generation failed: %s", + u_errorName(status)))); + } + else + result = pg_strnxfrm_prefix_icu_no_utf8(dest, src, srclen, destsize, + locale); + + return result; +} + +#endif + +/* + * Return true if the collation provider supports pg_strxfrm() and + * pg_strnxfrm(); otherwise false. + * + * Unfortunately, it seems that strxfrm() for non-C collations is broken on + * many common platforms; testing of multiple versions of glibc reveals that, + * for many locales, strcoll() and strxfrm() do not return consistent + * results. While no other libc other than Cygwin has so far been shown to + * have a problem, we take the conservative course of action for right now and + * disable this categorically. (Users who are certain this isn't a problem on + * their system can define TRUST_STRXFRM.) + * + * No similar problem is known for the ICU provider. + */ +bool +pg_strxfrm_enabled(pg_locale_t locale) +{ + if (!locale || locale->provider == COLLPROVIDER_LIBC) +#ifdef TRUST_STRXFRM + return true; +#else + return false; +#endif + else if (locale->provider == COLLPROVIDER_ICU) + return true; + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); +} + +/* + * pg_strxfrm + * + * Transforms 'src' to a nul-terminated string stored in 'dest' such that + * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on + * untransformed strings. + * + * The provided 'src' must be nul-terminated. If 'destsize' is zero, 'dest' + * may be NULL. + * + * Returns the number of bytes needed to store the transformed string, + * excluding the terminating nul byte. If the value returned is 'destsize' or + * greater, the resulting contents of 'dest' are undefined. + */ +size_t +pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale) +{ + size_t result; + + if (!locale || locale->provider == COLLPROVIDER_LIBC) + result = pg_strxfrm_libc(dest, src, destsize, locale); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strnxfrm_icu(dest, src, -1, destsize, locale); +#endif + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); + + return result; +} + +/* + * pg_strnxfrm + * + * Transforms 'src' to a nul-terminated string stored in 'dest' such that + * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on + * untransformed strings. + * + * 'src' does not need to be nul-terminated. If 'destsize' is zero, 'dest' may + * be NULL. + * + * Returns the number of bytes needed to store the transformed string, + * excluding the terminating nul byte. If the value returned is 'destsize' or + * greater, the resulting contents of 'dest' are undefined. + * + * This function may need to nul-terminate the argument for libc functions; + * so if the caller already has a nul-terminated string, it should call + * pg_strxfrm() instead. + */ +size_t +pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen, + pg_locale_t locale) +{ + size_t result; + + if (!locale || locale->provider == COLLPROVIDER_LIBC) + result = pg_strnxfrm_libc(dest, src, srclen, destsize, locale); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strnxfrm_icu(dest, src, srclen, destsize, locale); +#endif + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); + + return result; +} + +/* + * Return true if the collation provider supports pg_strxfrm_prefix() and + * pg_strnxfrm_prefix(); otherwise false. + */ +bool +pg_strxfrm_prefix_enabled(pg_locale_t locale) +{ + if (!locale || locale->provider == COLLPROVIDER_LIBC) + return false; + else if (locale->provider == COLLPROVIDER_ICU) + return true; + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); +} + +/* + * pg_strxfrm_prefix + * + * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary + * memcmp() on the byte sequence is equivalent to pg_strcoll() on + * untransformed strings. The result is not nul-terminated. + * + * The provided 'src' must be nul-terminated. + * + * If destsize is not large enough to hold the resulting byte sequence, stores + * only the first destsize bytes in 'dest'. Returns the number of bytes + * actually copied to 'dest'. + */ +size_t +pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, + pg_locale_t locale) +{ + size_t result; + + if (!locale || locale->provider == COLLPROVIDER_LIBC) + elog(ERROR, "collprovider '%c' does not support pg_strxfrm_prefix()", + locale->provider); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale); +#endif + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); + + return result; +} + +/* + * pg_strnxfrm_prefix + * + * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary + * memcmp() on the byte sequence is equivalent to pg_strcoll() on + * untransformed strings. The result is not nul-terminated. + * + * The provided 'src' must be nul-terminated. + * + * If destsize is not large enough to hold the resulting byte sequence, stores + * only the first destsize bytes in 'dest'. Returns the number of bytes + * actually copied to 'dest'. + * + * This function may need to nul-terminate the argument for libc functions; + * so if the caller already has a nul-terminated string, it should call + * pg_strxfrm_prefix() instead. + */ +size_t +pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, + size_t srclen, pg_locale_t locale) +{ + size_t result; + + if (!locale || locale->provider == COLLPROVIDER_LIBC) + elog(ERROR, "collprovider '%c' does not support pg_strnxfrm_prefix()", + locale->provider); +#ifdef USE_ICU + else if (locale->provider == COLLPROVIDER_ICU) + result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale); +#endif + else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", locale->provider); + + return result; +} + +#ifdef USE_ICU static void init_icu_converter(void) { @@ -1768,6 +2477,39 @@ init_icu_converter(void) } /* + * Find length, in UChars, of given string if converted to UChar string. + */ +static size_t +uchar_length(UConverter *converter, const char *str, int32_t len) +{ + UErrorCode status = U_ZERO_ERROR; + int32_t ulen; + ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status); + if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) + ereport(ERROR, + (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status)))); + return ulen; +} + +/* + * Convert the given source string into a UChar string, stored in dest, and + * return the length (in UChars). + */ +static int32_t +uchar_convert(UConverter *converter, UChar *dest, int32_t destlen, + const char *src, int32_t srclen) +{ + UErrorCode status = U_ZERO_ERROR; + int32_t ulen; + status = U_ZERO_ERROR; + ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status)))); + return ulen; +} + +/* * Convert a string in the database encoding into a string of UChars. * * The source string at buff is of length nbytes @@ -1782,26 +2524,15 @@ init_icu_converter(void) int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes) { - UErrorCode status; - int32_t len_uchar; + int32_t len_uchar; init_icu_converter(); - status = U_ZERO_ERROR; - len_uchar = ucnv_toUChars(icu_converter, NULL, 0, - buff, nbytes, &status); - if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) - ereport(ERROR, - (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status)))); + len_uchar = uchar_length(icu_converter, buff, nbytes); *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar)); - - status = U_ZERO_ERROR; - len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar + 1, - buff, nbytes, &status); - if (U_FAILURE(status)) - ereport(ERROR, - (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status)))); + len_uchar = uchar_convert(icu_converter, + *buff_uchar, len_uchar + 1, buff, nbytes); return len_uchar; } |