aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2004-02-02 03:07:25 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2004-02-02 03:07:25 +0000
commita60442a44656214e5370dd40b76715e9c348231e (patch)
treec71c026600cdeba5dc4749b5e6ee123d7172c1f1 /src/backend
parentb59917f02e3ccb025d393b1e38acc935033f07fd (diff)
downloadpostgresql-a60442a44656214e5370dd40b76715e9c348231e.tar.gz
postgresql-a60442a44656214e5370dd40b76715e9c348231e.zip
Avoid generating invalid character encoding sequences in make_greater_string.
Not sure how this mistake evaded detection for so long.
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/utils/adt/selfuncs.c93
1 files changed, 23 insertions, 70 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index f5a358ae9ec..d00f6addb3f 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.119.2.7 2003/12/07 04:11:26 joe Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.119.2.8 2004/02/02 03:07:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -173,8 +173,6 @@ static void get_join_vars(List *args, Var **var1, Var **var2);
static Selectivity prefix_selectivity(Query *root, Var *var, Oid vartype,
Const *prefix);
static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);
-static bool string_lessthan(const char *str1, const char *str2,
- Oid datatype);
static Oid find_operator(const char *opname, Oid datatype);
static Datum string_to_datum(const char *str, Oid datatype);
static Const *string_to_const(const char *str, Oid datatype);
@@ -3404,23 +3402,22 @@ locale_is_like_safe(void)
* string it is a prefix of. If successful, return a palloc'd string
* in the form of a Const pointer; else return NULL.
*
- * To work correctly in non-ASCII locales with weird collation orders,
- * we cannot simply increment "foo" to "fop" --- we have to check whether
- * we actually produced a string greater than the given one. If not,
- * increment the righthand byte again and repeat. If we max out the righthand
- * byte, truncate off the last character and start incrementing the next.
- * For example, if "z" were the last character in the sort order, then we
- * could produce "foo" as a string greater than "fonz".
+ * The key requirement here is that given a prefix string, say "foo",
+ * we must be able to generate another string "fop" that is greater
+ * than all strings "foobar" starting with "foo".
+ *
+ * If we max out the righthand byte, truncate off the last character
+ * and start incrementing the next. For example, if "z" were the last
+ * character in the sort order, then we could produce "foo" as a
+ * string greater than "fonz".
*
- * This could be rather slow in the worst case, but in most cases we won't
- * have to try more than one or two strings before succeeding.
+ * This could be rather slow in the worst case, but in most cases we
+ * won't have to try more than one or two strings before succeeding.
*
- * XXX this is actually not sufficient, since it only copes with the case
- * where individual characters collate in an order different from their
- * numeric code assignments. It does not handle cases where there are
- * cross-character effects, such as specially sorted digraphs, multiple
- * sort passes, etc. For now, we just shut down the whole thing in locales
- * that do such things :-(
+ * NOTE: at present this assumes we are in the C locale, so that simple
+ * bytewise comparison applies. However, we might be in a multibyte
+ * encoding such as UTF-8, so we do have to watch out for generating
+ * invalid encoding sequences.
*/
Const *
make_greater_string(const Const *str_const)
@@ -3467,13 +3464,20 @@ make_greater_string(const Const *str_const)
/*
* Try to generate a larger string by incrementing the last byte.
*/
- if (*lastchar < (unsigned char) 255)
+ while (*lastchar < (unsigned char) 255)
{
Const *workstr_const;
(*lastchar)++;
+
if (datatype != BYTEAOID)
+ {
+ /* do not generate invalid encoding sequences */
+ if (pg_verifymbstr((const unsigned char *) workstr,
+ len) != NULL)
+ continue;
workstr_const = string_to_const(workstr, datatype);
+ }
else
workstr_const = string_to_bytea_const(workstr, len);
@@ -3504,57 +3508,6 @@ make_greater_string(const Const *str_const)
return (Const *) NULL;
}
-/*
- * Test whether two strings are "<" according to the rules of the given
- * datatype. We do this the hard way, ie, actually calling the type's
- * "<" operator function, to ensure we get the right result...
- */
-static bool
-string_lessthan(const char *str1, const char *str2, Oid datatype)
-{
- Datum datum1 = string_to_datum(str1, datatype);
- Datum datum2 = string_to_datum(str2, datatype);
- bool result;
-
- switch (datatype)
- {
- case TEXTOID:
- result = DatumGetBool(DirectFunctionCall2(text_lt,
- datum1, datum2));
- break;
-
- case BPCHAROID:
- result = DatumGetBool(DirectFunctionCall2(bpcharlt,
- datum1, datum2));
- break;
-
- case VARCHAROID:
- result = DatumGetBool(DirectFunctionCall2(varcharlt,
- datum1, datum2));
- break;
-
- case NAMEOID:
- result = DatumGetBool(DirectFunctionCall2(namelt,
- datum1, datum2));
- break;
-
- case BYTEAOID:
- result = DatumGetBool(DirectFunctionCall2(bytealt,
- datum1, datum2));
- break;
-
- default:
- elog(ERROR, "string_lessthan: unexpected datatype %u", datatype);
- result = false;
- break;
- }
-
- pfree(DatumGetPointer(datum1));
- pfree(DatumGetPointer(datum2));
-
- return result;
-}
-
/* See if there is a binary op of the given name for the given datatype */
/* NB: we assume that only built-in system operators are searched for */
static Oid