aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2005-12-22 22:50:07 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2005-12-22 22:50:07 +0000
commite6242ba176278ff02af1bb2069a32fc83c176869 (patch)
tree87b4b95b9aba3abcbd13c1947bfd8a081a71ed19 /src
parentf545a05704e5148fec1f373820603cc7be73e7dd (diff)
downloadpostgresql-e6242ba176278ff02af1bb2069a32fc83c176869.tar.gz
postgresql-e6242ba176278ff02af1bb2069a32fc83c176869.zip
Adjust string comparison so that only bitwise-equal strings are considered
equal: if strcoll claims two strings are equal, check it with strcmp, and sort according to strcmp if not identical. This fixes inconsistent behavior under glibc's hu_HU locale, and probably under some other locales as well. Also, take advantage of the now-well-defined behavior to speed up texteq, textne, bpchareq, bpcharne: they may as well just do a bitwise comparison and not bother with strcoll at all. NOTE: affected databases may need to REINDEX indexes on text columns to be sure they are self-consistent.
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/hash/hashfunc.c8
-rw-r--r--src/backend/utils/adt/varchar.c20
-rw-r--r--src/backend/utils/adt/varlena.c27
3 files changed, 40 insertions, 15 deletions
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index 2ffca5efe6a..a4803708ad4 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.45 2005/10/15 02:49:08 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.45.2.1 2005/12/22 22:50:06 tgl Exp $
*
* NOTES
* These functions are stored in pg_amproc. For each operator class
@@ -138,9 +138,9 @@ hashtext(PG_FUNCTION_ARGS)
Datum result;
/*
- * Note: this is currently identical in behavior to hashvarlena, but it
- * seems likely that we may need to do something different in non-C
- * locales. (See also hashbpchar, if so.)
+ * Note: this is currently identical in behavior to hashvarlena, but
+ * keep it as a separate function in case we someday want to do something
+ * different in non-C locales. (See also hashbpchar, if so.)
*/
result = hash_any((unsigned char *) VARDATA(key),
VARSIZE(key) - VARHDRSZ);
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 1377e7cc6d2..006b60ada0f 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.113 2005/10/15 02:49:30 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.113.2.1 2005/12/22 22:50:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -614,11 +614,14 @@ bpchareq(PG_FUNCTION_ARGS)
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
- /* fast path for different-length inputs */
+ /*
+ * Since we only care about equality or not-equality, we can avoid all
+ * the expense of strcoll() here, and just do bitwise comparison.
+ */
if (len1 != len2)
result = false;
else
- result = (varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2) == 0);
+ result = (strncmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -638,11 +641,14 @@ bpcharne(PG_FUNCTION_ARGS)
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
- /* fast path for different-length inputs */
+ /*
+ * Since we only care about equality or not-equality, we can avoid all
+ * the expense of strcoll() here, and just do bitwise comparison.
+ */
if (len1 != len2)
result = true;
else
- result = (varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2) != 0);
+ result = (strncmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -789,7 +795,9 @@ bpchar_smaller(PG_FUNCTION_ARGS)
* bpchar needs a specialized hash function because we want to ignore
* trailing blanks in comparisons.
*
- * XXX is there any need for locale-specific behavior here?
+ * Note: currently there is no need for locale-specific behavior here,
+ * but if we ever change the semantics of bpchar comparison to trust
+ * strcoll() completely, we'd need to do something different in non-C locales.
*/
Datum
hashbpchar(PG_FUNCTION_ARGS)
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 63e5d6b8dd0..e6837ebe476 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.139.2.1 2005/11/22 18:23:22 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.139.2.2 2005/12/22 22:50:07 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -938,6 +938,15 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2)
result = strcoll(a1p, a2p);
+ /*
+ * In some locales strcoll() can claim that nonidentical strings are
+ * equal. Believing that would be bad news for a number of reasons,
+ * so we follow Perl's lead and sort "equal" strings according to
+ * strcmp().
+ */
+ if (result == 0)
+ result = strcmp(a1p, a2p);
+
if (a1p != a1buf)
pfree(a1p);
if (a2p != a2buf)
@@ -984,11 +993,15 @@ texteq(PG_FUNCTION_ARGS)
text *arg2 = PG_GETARG_TEXT_P(1);
bool result;
- /* fast path for different-length inputs */
+ /*
+ * Since we only care about equality or not-equality, we can avoid all
+ * the expense of strcoll() here, and just do bitwise comparison.
+ */
if (VARSIZE(arg1) != VARSIZE(arg2))
result = false;
else
- result = (text_cmp(arg1, arg2) == 0);
+ result = (strncmp(VARDATA(arg1), VARDATA(arg2),
+ VARSIZE(arg1) - VARHDRSZ) == 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
@@ -1003,11 +1016,15 @@ textne(PG_FUNCTION_ARGS)
text *arg2 = PG_GETARG_TEXT_P(1);
bool result;
- /* fast path for different-length inputs */
+ /*
+ * Since we only care about equality or not-equality, we can avoid all
+ * the expense of strcoll() here, and just do bitwise comparison.
+ */
if (VARSIZE(arg1) != VARSIZE(arg2))
result = true;
else
- result = (text_cmp(arg1, arg2) != 0);
+ result = (strncmp(VARDATA(arg1), VARDATA(arg2),
+ VARSIZE(arg1) - VARHDRSZ) != 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);