aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/like_support.c
diff options
context:
space:
mode:
authorPeter Eisentraut <peter@eisentraut.org>2024-11-27 08:18:35 +0100
committerPeter Eisentraut <peter@eisentraut.org>2024-11-27 08:19:42 +0100
commit85b7efa1cdd63c2fe2b70b725b8285743ee5787f (patch)
tree812b8d1f7a41163284043e4c53f5949daec7f37c /src/backend/utils/adt/like_support.c
parent8fcd80258bcf43dab93d877a5de0ce3f4d2bd471 (diff)
downloadpostgresql-85b7efa1cdd63c2fe2b70b725b8285743ee5787f.tar.gz
postgresql-85b7efa1cdd63c2fe2b70b725b8285743ee5787f.zip
Support LIKE with nondeterministic collations
This allows for example using LIKE with case-insensitive collations. There was previously no internal implementation of this, so it was met with a not-supported error. This adds the internal implementation and removes the error. The implementation follows the specification of the SQL standard for this. Unlike with deterministic collations, the LIKE matching cannot go character by character but has to go substring by substring. For example, if we are matching against LIKE 'foo%bar', we can't start by looking for an 'f', then an 'o', but instead with have to find something that matches 'foo'. This is because the collation could consider substrings of different lengths to be equal. This is all internal to MatchText() in like_match.c. The changes in GenericMatchText() in like.c just pass through the locale information to MatchText(), which was previously not needed. This matches exactly Generic_Text_IC_like() below. ILIKE is not affected. (It's unclear whether ILIKE makes sense under nondeterministic collations.) This also updates match_pattern_prefix() in like_support.c to support optimizing the case of an exact pattern with nondeterministic collations. This was already alluded to in the previous code. (includes documentation examples from Daniel Vérité and test cases from Paul A Jungwirth) Reviewed-by: Jian He <jian.universality@gmail.com> Discussion: https://www.postgresql.org/message-id/flat/700d2e86-bf75-4607-9cf2-f5b7802f6e88@eisentraut.org
Diffstat (limited to 'src/backend/utils/adt/like_support.c')
-rw-r--r--src/backend/utils/adt/like_support.c29
1 files changed, 13 insertions, 16 deletions
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 8b15509a3bf..ee71ca89ffd 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -273,22 +273,6 @@ match_pattern_prefix(Node *leftop,
patt = (Const *) rightop;
/*
- * Not supported if the expression collation is nondeterministic. The
- * optimized equality or prefix tests use bytewise comparisons, which is
- * not consistent with nondeterministic collations. The actual
- * pattern-matching implementation functions will later error out that
- * pattern-matching is not supported with nondeterministic collations. (We
- * could also error out here, but by doing it later we get more precise
- * error messages.) (It should be possible to support at least
- * Pattern_Prefix_Exact, but no point as long as the actual
- * pattern-matching implementations don't support it.)
- *
- * expr_coll is not set for a non-collation-aware data type such as bytea.
- */
- if (expr_coll && !get_collation_isdeterministic(expr_coll))
- return NIL;
-
- /*
* Try to extract a fixed prefix from the pattern.
*/
pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
@@ -404,6 +388,8 @@ match_pattern_prefix(Node *leftop,
{
if (!op_in_opfamily(eqopr, opfamily))
return NIL;
+ if (indexcollation != expr_coll)
+ return NIL;
expr = make_opclause(eqopr, BOOLOID, false,
(Expr *) leftop, (Expr *) prefix,
InvalidOid, indexcollation);
@@ -412,6 +398,17 @@ match_pattern_prefix(Node *leftop,
}
/*
+ * Anything other than Pattern_Prefix_Exact is not supported if the
+ * expression collation is nondeterministic. The optimized equality or
+ * prefix tests use bytewise comparisons, which is not consistent with
+ * nondeterministic collations.
+ *
+ * expr_coll is not set for a non-collation-aware data type such as bytea.
+ */
+ if (expr_coll && !get_collation_isdeterministic(expr_coll))
+ return NIL;
+
+ /*
* Otherwise, we have a nonempty required prefix of the values. Some
* opclasses support prefix checks directly, otherwise we'll try to
* generate a range constraint.