diff options
Diffstat (limited to 'src/backend/access/hash/hashfunc.c')
-rw-r--r-- | src/backend/access/hash/hashfunc.c | 100 |
1 files changed, 89 insertions, 11 deletions
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index e5f3d42e045..0bf15ae7236 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -27,8 +27,10 @@ #include "postgres.h" #include "access/hash.h" +#include "catalog/pg_collation.h" #include "utils/builtins.h" #include "utils/hashutils.h" +#include "utils/pg_locale.h" /* * Datatype-specific hash functions. @@ -243,15 +245,51 @@ Datum hashtext(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); + Oid collid = PG_GET_COLLATION(); + pg_locale_t mylocale = 0; Datum result; - /* - * Note: this is currently identical in behavior to hashvarlena, but keep - * it as a separate function in case we someday want to do something - * different in non-C locales. (See also hashbpchar, if so.) - */ - result = hash_any((unsigned char *) VARDATA_ANY(key), - VARSIZE_ANY_EXHDR(key)); + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (!mylocale || mylocale->deterministic) + { + result = hash_any((unsigned char *) VARDATA_ANY(key), + VARSIZE_ANY_EXHDR(key)); + } + else + { +#ifdef USE_ICU + if (mylocale->provider == COLLPROVIDER_ICU) + { + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any(buf, bsize); + + pfree(buf); + } + else +#endif + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + } /* Avoid leaking memory for toasted inputs */ PG_FREE_IF_COPY(key, 0); @@ -263,12 +301,52 @@ Datum hashtextextended(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); + Oid collid = PG_GET_COLLATION(); + pg_locale_t mylocale = 0; Datum result; - /* Same approach as hashtext */ - result = hash_any_extended((unsigned char *) VARDATA_ANY(key), - VARSIZE_ANY_EXHDR(key), - PG_GETARG_INT64(1)); + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (!mylocale || mylocale->deterministic) + { + result = hash_any_extended((unsigned char *) VARDATA_ANY(key), + VARSIZE_ANY_EXHDR(key), + PG_GETARG_INT64(1)); + } + else + { +#ifdef USE_ICU + if (mylocale->provider == COLLPROVIDER_ICU) + { + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1)); + + pfree(buf); + } + else +#endif + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + } PG_FREE_IF_COPY(key, 0); |