aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2009-05-24 18:10:38 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2009-05-24 18:10:38 +0000
commitfc2660fc253ce871b2f26e7b6dce133092357cb0 (patch)
tree65635eab224dcd5421891194e1b580fb4b9e9a39 /src
parentc6c458e24dd225e719ac289efdee50b2d6d1d16a (diff)
downloadpostgresql-fc2660fc253ce871b2f26e7b6dce133092357cb0.tar.gz
postgresql-fc2660fc253ce871b2f26e7b6dce133092357cb0.zip
Fix LIKE's special-case code for % followed by _. I'm not entirely sure that
this case is worth a special code path, but a special code path that gets the boundary condition wrong is definitely no good. Per bug #4821 from Andrew Gierth. In passing, clean up some minor code formatting issues (excess parentheses and blank lines in odd places). Back-patch to 8.3, where the bug was introduced.
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/like_match.c120
-rw-r--r--src/test/regress/expected/strings.out27
-rw-r--r--src/test/regress/sql/strings.sql11
3 files changed, 96 insertions, 62 deletions
diff --git a/src/backend/utils/adt/like_match.c b/src/backend/utils/adt/like_match.c
index 95550e0530e..d72113b66b3 100644
--- a/src/backend/utils/adt/like_match.c
+++ b/src/backend/utils/adt/like_match.c
@@ -9,7 +9,7 @@
* (UTF8 is a special case because we can use a much more efficient version
* of NextChar than can be used for general multi-byte encodings.)
*
- * Before the inclusion, we need to define following macros:
+ * Before the inclusion, we need to define the following macros:
*
* NextChar
* MatchText - to name of function wanted
@@ -19,47 +19,46 @@
* Copyright (c) 1996-2009, PostgreSQL Global Development Group
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.24 2009/01/01 17:23:49 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.25 2009/05/24 18:10:37 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
-** Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
-** Rich $alz is now <rsalz@bbn.com>.
-** Special thanks to Lars Mathiesen <thorinn@diku.dk> for the LABORT code.
-**
-** This code was shamelessly stolen from the "pql" code by myself and
-** slightly modified :)
-**
-** All references to the word "star" were replaced by "percent"
-** All references to the word "wild" were replaced by "like"
-**
-** All the nice shell RE matching stuff was replaced by just "_" and "%"
-**
-** As I don't have a copy of the SQL standard handy I wasn't sure whether
-** to leave in the '\' escape character handling.
-**
-** Keith Parks. <keith@mtcc.demon.co.uk>
-**
-** SQL92 lets you specify the escape character by saying
-** LIKE <pattern> ESCAPE <escape character>. We are a small operation
-** so we force you to use '\'. - ay 7/95
-**
-** Now we have the like_escape() function that converts patterns with
-** any specified escape character (or none at all) to the internal
-** default escape character, which is still '\'. - tgl 9/2000
-**
-** The code is rewritten to avoid requiring null-terminated strings,
-** which in turn allows us to leave out some memcpy() operations.
-** This code should be faster and take less memory, but no promises...
-** - thomas 2000-08-06
-**
-*/
+ * Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
+ * Rich $alz is now <rsalz@bbn.com>.
+ * Special thanks to Lars Mathiesen <thorinn@diku.dk> for the LABORT code.
+ *
+ * This code was shamelessly stolen from the "pql" code by myself and
+ * slightly modified :)
+ *
+ * All references to the word "star" were replaced by "percent"
+ * All references to the word "wild" were replaced by "like"
+ *
+ * All the nice shell RE matching stuff was replaced by just "_" and "%"
+ *
+ * As I don't have a copy of the SQL standard handy I wasn't sure whether
+ * to leave in the '\' escape character handling.
+ *
+ * Keith Parks. <keith@mtcc.demon.co.uk>
+ *
+ * SQL92 lets you specify the escape character by saying
+ * LIKE <pattern> ESCAPE <escape character>. We are a small operation
+ * so we force you to use '\'. - ay 7/95
+ *
+ * Now we have the like_escape() function that converts patterns with
+ * any specified escape character (or none at all) to the internal
+ * default escape character, which is still '\'. - tgl 9/2000
+ *
+ * The code is rewritten to avoid requiring null-terminated strings,
+ * which in turn allows us to leave out some memcpy() operations.
+ * This code should be faster and take less memory, but no promises...
+ * - thomas 2000-08-06
+ */
/*--------------------
- * Match text and p, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
+ * Match text and pattern, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
*
* LIKE_TRUE: they match
* LIKE_FALSE: they don't match
@@ -80,19 +79,18 @@ static int
MatchText(char *t, int tlen, char *p, int plen)
{
/* Fast path for match-everything pattern */
- if ((plen == 1) && (*p == '%'))
+ if (plen == 1 && *p == '%')
return LIKE_TRUE;
/*
* In this loop, we advance by char when matching wildcards (and thus on
* recursive entry to this function we are properly char-synced). On other
* occasions it is safe to advance by byte, as the text and pattern will
- * be in lockstep. This allows us to perform all comparisons between the
+ * be in lockstep. This allows us to perform all comparisons between the
* text and pattern on a byte by byte basis, even for multi-byte
* encodings.
*/
-
- while ((tlen > 0) && (plen > 0))
+ while (tlen > 0 && plen > 0)
{
if (*p == '\\')
{
@@ -116,7 +114,7 @@ MatchText(char *t, int tlen, char *p, int plen)
/* %% is the same as % according to the SQL standard */
/* Advance past all %'s */
- while ((plen > 0) && (*p == '%'))
+ while (plen > 0 && *p == '%')
NextByte(p, plen);
/* Trailing percent matches everything. */
if (plen <= 0)
@@ -127,22 +125,24 @@ MatchText(char *t, int tlen, char *p, int plen)
* rest of the pattern.
*/
if (*p == '_')
-
{
/* %_ is the same as _% - avoid matching _ repeatedly */
- NextChar(t, tlen);
- NextByte(p, plen);
-
- if (tlen <= 0)
- {
- return (plen <= 0) ? LIKE_TRUE : LIKE_ABORT;
- }
- else if (plen <= 0)
+ do
{
- return LIKE_FALSE;
- }
-
+ NextChar(t, tlen);
+ NextByte(p, plen);
+ } while (tlen > 0 && plen > 0 && *p == '_');
+
+ /*
+ * If we are at the end of the pattern, succeed: % followed
+ * by n _'s matches any string of at least n characters, and
+ * we have now found there are at least n characters.
+ */
+ if (plen <= 0)
+ return LIKE_TRUE;
+
+ /* Look for a place that matches the rest of the pattern */
while (tlen > 0)
{
int matched = MatchText(t, tlen, p, plen);
@@ -155,7 +155,6 @@ MatchText(char *t, int tlen, char *p, int plen)
}
else
{
-
char firstpat = TCHAR(*p);
if (*p == '\\')
@@ -180,7 +179,6 @@ MatchText(char *t, int tlen, char *p, int plen)
}
NextChar(t, tlen);
-
}
}
@@ -192,20 +190,20 @@ MatchText(char *t, int tlen, char *p, int plen)
}
else if (*p == '_')
{
+ /* _ matches any single character, and we know there is one */
NextChar(t, tlen);
NextByte(p, plen);
continue;
}
- else if (TCHAR(*t) != TCHAR(*p))
+ else if (TCHAR(*p) != TCHAR(*t))
{
- /*
- * Not the single-character wildcard and no explicit match? Then
- * time to quit...
- */
+ /* non-wildcard pattern char fails to match text char */
return LIKE_FALSE;
}
/*
+ * Pattern and text match, so advance.
+ *
* It is safe to use NextByte instead of NextChar here, even for
* multi-byte character sets, because we are not following immediately
* after a wildcard character. If we are in the middle of a multibyte
@@ -222,9 +220,8 @@ MatchText(char *t, int tlen, char *p, int plen)
if (tlen > 0)
return LIKE_FALSE; /* end of pattern, but not of text */
- /* End of input string. Do we have matching pattern remaining? */
- while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of
- * pattern */
+ /* End of text string. Do we have matching pattern remaining? */
+ while (plen > 0 && *p == '%') /* allow multiple %'s at end of pattern */
NextByte(p, plen);
if (plen <= 0)
@@ -354,5 +351,4 @@ do_like_escape(text *pat, text *esc)
#ifdef MATCH_LOWER
#undef MATCH_LOWER
-
#endif
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index be8eb919fa3..1241a2ace63 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -903,6 +903,33 @@ SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
(1 row)
--
+-- test %/_ combination cases, cf bug #4821
+--
+SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
+ t | t | f
+---+---+---
+ t | t | f
+(1 row)
+
+SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f;
+ t | t | f
+---+---+---
+ t | t | f
+(1 row)
+
+SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
+ t | t | f
+---+---+---
+ t | t | f
+(1 row)
+
+SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
+ t | t | f
+---+---+---
+ t | t | f
+(1 row)
+
+--
-- test implicit type conversion
--
-- E021-07 character concatenation
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index a28c75ac044..681a0e1e62c 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -280,6 +280,17 @@ SELECT 'Hawkeye' ILIKE 'h%' AS "true";
SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
--
+-- test %/_ combination cases, cf bug #4821
+--
+
+SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
+SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f;
+
+SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
+SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
+
+
+--
-- test implicit type conversion
--