diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2000-09-15 18:45:31 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2000-09-15 18:45:31 +0000 |
commit | 8ae9ad1cb8cfcaa929322ee3df48094f5ffd96e9 (patch) | |
tree | e72ebab2801d58150f1ce72f488e005ab23854ba /src/backend/utils/adt/like.c | |
parent | 148f905f4185bbad191bceba5e7a173b38e7d3ba (diff) | |
download | postgresql-8ae9ad1cb8cfcaa929322ee3df48094f5ffd96e9.tar.gz postgresql-8ae9ad1cb8cfcaa929322ee3df48094f5ffd96e9.zip |
Reimplement LIKE/ESCAPE as operators so that indexscan optimization
can still work, per recent discussion on pghackers. Correct some bugs
in ILIKE implementation.
Diffstat (limited to 'src/backend/utils/adt/like.c')
-rw-r--r-- | src/backend/utils/adt/like.c | 491 |
1 files changed, 207 insertions, 284 deletions
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index e492c58cbad..41e86648b87 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -11,12 +11,14 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/like.c,v 1.41 2000/08/22 06:33:57 ishii Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/like.c,v 1.42 2000/09/15 18:45:26 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" + #include <ctype.h> + #ifdef MULTIBYTE #include "mb/pg_wchar.h" #endif @@ -28,92 +30,129 @@ #define LIKE_ABORT (-1) -static int MatchText(unsigned char * t, int tlen, unsigned char * p, int plen, char *e); -static int MatchTextLower(unsigned char * t, int tlen, unsigned char * p, int plen, char *e); +static int MatchText(unsigned char * t, int tlen, + unsigned char * p, int plen); +static int MatchTextIC(unsigned char * t, int tlen, + unsigned char * p, int plen); -/* - * interface routines called by the function manager +#ifdef MULTIBYTE +/*-------------------- + * Support routine for MatchText. Compares given multibyte streams + * as wide characters. If they match, returns 1 otherwise returns 0. + *-------------------- */ - -Datum -namelike(PG_FUNCTION_ARGS) +static int wchareq(unsigned char *p1, unsigned char *p2) { - bool result; - Name str = PG_GETARG_NAME(0); - text *pat = PG_GETARG_TEXT_P(1); - unsigned char *s, *p; - int slen, plen; + int l; - s = NameStr(*str); - slen = strlen(s); - p = VARDATA(pat); - plen = (VARSIZE(pat)-VARHDRSZ); + l = pg_mblen(p1); + if (pg_mblen(p2) != l) { + return(0); + } + while (l--) { + if (*p1++ != *p2++) + return(0); + } + return(1); +} - result = (MatchText(s, slen, p, plen, "\\") == LIKE_TRUE); +/*-------------------- + * Support routine for MatchTextIC. Compares given multibyte streams + * as wide characters ignoring case. + * If they match, returns 1 otherwise returns 0. + *-------------------- + */ +#define UCHARMAX 0xff - PG_RETURN_BOOL(result); +static int iwchareq(unsigned char *p1, unsigned char *p2) +{ + int c1, c2; + int l; + + /* short cut. if *p1 and *p2 is lower than UCHARMAX, then + we assume they are ASCII */ + if (*p1 < UCHARMAX && *p2 < UCHARMAX) + return(tolower(*p1) == tolower(*p2)); + + if (*p1 < UCHARMAX) + c1 = tolower(*p1); + else + { + l = pg_mblen(p1); + (void)pg_mb2wchar_with_len(p1, (pg_wchar *)&c1, l); + c1 = tolower(c1); + } + if (*p2 < UCHARMAX) + c2 = tolower(*p2); + else + { + l = pg_mblen(p2); + (void)pg_mb2wchar_with_len(p2, (pg_wchar *)&c2, l); + c2 = tolower(c2); + } + return(c1 == c2); } -Datum -namenlike(PG_FUNCTION_ARGS) -{ - bool result; - Name str = PG_GETARG_NAME(0); - text *pat = PG_GETARG_TEXT_P(1); - unsigned char *s, *p; - int slen, plen; +#endif - s = NameStr(*str); - slen = strlen(s); - p = VARDATA(pat); - plen = (VARSIZE(pat)-VARHDRSZ); +#ifdef MULTIBYTE +#define CHAREQ(p1, p2) wchareq(p1, p2) +#define ICHAREQ(p1, p2) iwchareq(p1, p2) +#define NextChar(p, plen) \ + do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0) +#define CopyAdvChar(dst, src, srclen) \ + do { int __l = pg_mblen(src); \ + (srclen) -= __l; \ + while (__l-- > 0) \ + *(dst)++ = *(src)++; \ + } while (0) +#else +#define CHAREQ(p1, p2) (*(p1) == *(p2)) +#define ICHAREQ(p1, p2) (tolower(*(p1)) == tolower(*(p2))) +#define NextChar(p, plen) ((p)++, (plen)--) +#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) +#endif - result = (MatchText(s, slen, p, plen, "\\") != LIKE_TRUE); - PG_RETURN_BOOL(result); -} +/* + * interface routines called by the function manager + */ Datum -namelike_escape(PG_FUNCTION_ARGS) +namelike(PG_FUNCTION_ARGS) { - bool result; Name str = PG_GETARG_NAME(0); text *pat = PG_GETARG_TEXT_P(1); - text *esc = PG_GETARG_TEXT_P(2); + bool result; unsigned char *s, *p; int slen, plen; - char *e; s = NameStr(*str); slen = strlen(s); p = VARDATA(pat); plen = (VARSIZE(pat)-VARHDRSZ); - e = ((VARSIZE(esc)-VARHDRSZ) > 0? VARDATA(esc): NULL); - result = (MatchText(s, slen, p, plen, e) == LIKE_TRUE); + result = (MatchText(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } Datum -namenlike_escape(PG_FUNCTION_ARGS) +namenlike(PG_FUNCTION_ARGS) { - bool result; Name str = PG_GETARG_NAME(0); text *pat = PG_GETARG_TEXT_P(1); - text *esc = PG_GETARG_TEXT_P(2); + bool result; unsigned char *s, *p; int slen, plen; - char *e; s = NameStr(*str); slen = strlen(s); p = VARDATA(pat); plen = (VARSIZE(pat)-VARHDRSZ); - e = ((VARSIZE(esc)-VARHDRSZ) > 0? VARDATA(esc): NULL); - result = (MatchText(s, slen, p, plen, e) != LIKE_TRUE); + result = (MatchText(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } @@ -121,28 +160,9 @@ namenlike_escape(PG_FUNCTION_ARGS) Datum textlike(PG_FUNCTION_ARGS) { - bool result; text *str = PG_GETARG_TEXT_P(0); text *pat = PG_GETARG_TEXT_P(1); - unsigned char *s, *p; - int slen, plen; - - s = VARDATA(str); - slen = (VARSIZE(str)-VARHDRSZ); - p = VARDATA(pat); - plen = (VARSIZE(pat)-VARHDRSZ); - - result = (MatchText(s, slen, p, plen, NULL) == LIKE_TRUE); - - PG_RETURN_BOOL(result); -} - -Datum -textnlike(PG_FUNCTION_ARGS) -{ bool result; - text *str = PG_GETARG_TEXT_P(0); - text *pat = PG_GETARG_TEXT_P(1); unsigned char *s, *p; int slen, plen; @@ -151,51 +171,26 @@ textnlike(PG_FUNCTION_ARGS) p = VARDATA(pat); plen = (VARSIZE(pat)-VARHDRSZ); - result = (MatchText(s, slen, p, plen, "\\") != LIKE_TRUE); + result = (MatchText(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } Datum -textlike_escape(PG_FUNCTION_ARGS) +textnlike(PG_FUNCTION_ARGS) { - bool result; text *str = PG_GETARG_TEXT_P(0); text *pat = PG_GETARG_TEXT_P(1); - text *esc = PG_GETARG_TEXT_P(2); - unsigned char *s, *p; - int slen, plen; - char *e; - - s = VARDATA(str); - slen = (VARSIZE(str)-VARHDRSZ); - p = VARDATA(pat); - plen = (VARSIZE(pat)-VARHDRSZ); - e = ((VARSIZE(esc)-VARHDRSZ) > 0? VARDATA(esc): NULL); - - result = (MatchText(s, slen, p, plen, e) == LIKE_TRUE); - - PG_RETURN_BOOL(result); -} - -Datum -textnlike_escape(PG_FUNCTION_ARGS) -{ bool result; - text *str = PG_GETARG_TEXT_P(0); - text *pat = PG_GETARG_TEXT_P(1); - text *esc = PG_GETARG_TEXT_P(2); unsigned char *s, *p; int slen, plen; - char *e; s = VARDATA(str); slen = (VARSIZE(str)-VARHDRSZ); p = VARDATA(pat); plen = (VARSIZE(pat)-VARHDRSZ); - e = ((VARSIZE(esc)-VARHDRSZ) > 0? VARDATA(esc): NULL); - result = (MatchText(s, slen, p, plen, e) != LIKE_TRUE); + result = (MatchText(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } @@ -205,30 +200,11 @@ textnlike_escape(PG_FUNCTION_ARGS) */ Datum -inamelike(PG_FUNCTION_ARGS) +nameiclike(PG_FUNCTION_ARGS) { - bool result; Name str = PG_GETARG_NAME(0); text *pat = PG_GETARG_TEXT_P(1); - unsigned char *s, *p; - int slen, plen; - - s = NameStr(*str); - slen = strlen(s); - p = VARDATA(pat); - plen = (VARSIZE(pat)-VARHDRSZ); - - result = (MatchTextLower(s, slen, p, plen, "\\") == LIKE_TRUE); - - PG_RETURN_BOOL(result); -} - -Datum -inamenlike(PG_FUNCTION_ARGS) -{ bool result; - Name str = PG_GETARG_NAME(0); - text *pat = PG_GETARG_TEXT_P(1); unsigned char *s, *p; int slen, plen; @@ -237,61 +213,36 @@ inamenlike(PG_FUNCTION_ARGS) p = VARDATA(pat); plen = (VARSIZE(pat)-VARHDRSZ); - result = (MatchTextLower(s, slen, p, plen, "\\") != LIKE_TRUE); + result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } Datum -inamelike_escape(PG_FUNCTION_ARGS) +nameicnlike(PG_FUNCTION_ARGS) { - bool result; Name str = PG_GETARG_NAME(0); text *pat = PG_GETARG_TEXT_P(1); - text *esc = PG_GETARG_TEXT_P(2); - unsigned char *s, *p; - int slen, plen; - char *e; - - s = NameStr(*str); - slen = strlen(s); - p = VARDATA(pat); - plen = (VARSIZE(pat)-VARHDRSZ); - e = ((VARSIZE(esc)-VARHDRSZ) > 0? VARDATA(esc): NULL); - - result = (MatchTextLower(s, slen, p, plen, e) == LIKE_TRUE); - - PG_RETURN_BOOL(result); -} - -Datum -inamenlike_escape(PG_FUNCTION_ARGS) -{ bool result; - Name str = PG_GETARG_NAME(0); - text *pat = PG_GETARG_TEXT_P(1); - text *esc = PG_GETARG_TEXT_P(2); unsigned char *s, *p; int slen, plen; - char *e; s = NameStr(*str); slen = strlen(s); p = VARDATA(pat); plen = (VARSIZE(pat)-VARHDRSZ); - e = ((VARSIZE(esc)-VARHDRSZ) > 0? VARDATA(esc): NULL); - result = (MatchTextLower(s, slen, p, plen, e) != LIKE_TRUE); + result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } Datum -itextlike(PG_FUNCTION_ARGS) +texticlike(PG_FUNCTION_ARGS) { - bool result; text *str = PG_GETARG_TEXT_P(0); text *pat = PG_GETARG_TEXT_P(1); + bool result; unsigned char *s, *p; int slen, plen; @@ -300,17 +251,17 @@ itextlike(PG_FUNCTION_ARGS) p = VARDATA(pat); plen = (VARSIZE(pat)-VARHDRSZ); - result = (MatchTextLower(s, slen, p, plen, "\\") == LIKE_TRUE); + result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } Datum -itextnlike(PG_FUNCTION_ARGS) +texticnlike(PG_FUNCTION_ARGS) { - bool result; text *str = PG_GETARG_TEXT_P(0); text *pat = PG_GETARG_TEXT_P(1); + bool result; unsigned char *s, *p; int slen, plen; @@ -319,53 +270,100 @@ itextnlike(PG_FUNCTION_ARGS) p = VARDATA(pat); plen = (VARSIZE(pat)-VARHDRSZ); - result = (MatchTextLower(s, slen, p, plen, "\\") != LIKE_TRUE); + result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } +/* + * like_escape() --- given a pattern and an ESCAPE string, + * convert the pattern to use Postgres' standard backslash escape convention. + */ Datum -itextlike_escape(PG_FUNCTION_ARGS) +like_escape(PG_FUNCTION_ARGS) { - bool result; - text *str = PG_GETARG_TEXT_P(0); - text *pat = PG_GETARG_TEXT_P(1); - text *esc = PG_GETARG_TEXT_P(2); - unsigned char *s, *p; - int slen, plen; - char *e; + text *pat = PG_GETARG_TEXT_P(0); + text *esc = PG_GETARG_TEXT_P(1); + text *result; + unsigned char *p, *e, *r; + int plen, elen; + bool afterescape; - s = VARDATA(str); - slen = (VARSIZE(str)-VARHDRSZ); p = VARDATA(pat); plen = (VARSIZE(pat)-VARHDRSZ); - e = ((VARSIZE(esc)-VARHDRSZ) > 0? VARDATA(esc): NULL); - - result = (MatchTextLower(s, slen, p, plen, e) == LIKE_TRUE); + e = VARDATA(esc); + elen = (VARSIZE(esc)-VARHDRSZ); - PG_RETURN_BOOL(result); -} - -Datum -itextnlike_escape(PG_FUNCTION_ARGS) -{ - bool result; - text *str = PG_GETARG_TEXT_P(0); - text *pat = PG_GETARG_TEXT_P(1); - text *esc = PG_GETARG_TEXT_P(2); - unsigned char *s, *p; - int slen, plen; - char *e; + /* + * Worst-case pattern growth is 2x --- unlikely, but it's hardly worth + * trying to calculate the size more accurately than that. + */ + result = (text *) palloc(plen * 2 + VARHDRSZ); + r = VARDATA(result); - s = VARDATA(str); - slen = (VARSIZE(str)-VARHDRSZ); - p = VARDATA(pat); - plen = (VARSIZE(pat)-VARHDRSZ); - e = ((VARSIZE(esc)-VARHDRSZ) > 0? VARDATA(esc): NULL); + if (elen == 0) + { + /* + * No escape character is wanted. Double any backslashes in the + * pattern to make them act like ordinary characters. + */ + while (plen > 0) + { + if (*p == '\\') + *r++ = '\\'; + CopyAdvChar(r, p, plen); + } + } + else + { + /* + * The specified escape must be only a single character. + */ + NextChar(e, elen); + if (elen != 0) + elog(ERROR, "ESCAPE string must be empty or one character"); + e = VARDATA(esc); + /* + * If specified escape is '\', just copy the pattern as-is. + */ + if (*e == '\\') + { + memcpy(result, pat, VARSIZE(pat)); + PG_RETURN_TEXT_P(result); + } + /* + * Otherwise, convert occurrences of the specified escape character + * to '\', and double occurrences of '\' --- unless they immediately + * follow an escape character! + */ + afterescape = false; + while (plen > 0) + { + if (CHAREQ(p,e) && !afterescape) + { + *r++ = '\\'; + NextChar(p, plen); + afterescape = true; + } + else if (*p == '\\') + { + *r++ = '\\'; + if (! afterescape) + *r++ = '\\'; + NextChar(p, plen); + afterescape = false; + } + else + { + CopyAdvChar(r, p, plen); + afterescape = false; + } + } + } - result = (MatchTextLower(s, slen, p, plen, e) != LIKE_TRUE); + VARATT_SIZEP(result) = r - ((unsigned char *) result); - PG_RETURN_BOOL(result); + PG_RETURN_TEXT_P(result); } @@ -387,13 +385,14 @@ itextnlike_escape(PG_FUNCTION_ARGS) ** ** Keith Parks. <keith@mtcc.demon.co.uk> ** -** [SQL92 lets you specify the escape character by saying -** LIKE <pattern> ESCAPE <escape character>. We are a small operation -** so we force you to use '\'. - ay 7/95] +** SQL92 lets you specify the escape character by saying +** LIKE <pattern> ESCAPE <escape character>. We are a small operation +** so we force you to use '\'. - ay 7/95 +** +** Now we have the like_escape() function that converts patterns with +** any specified escape character (or none at all) to the internal +** default escape character, which is still '\'. - tgl 9/2000 ** -** OK, we now support the SQL9x LIKE <pattern> ESCAPE <char> syntax. -** We should kill the backslash escaping mechanism since it is non-standard -** and undocumented afaik. ** The code is rewritten to avoid requiring null-terminated strings, ** which in turn allows us to leave out some memcpy() operations. ** This code should be faster and take less memory, but no promises... @@ -401,6 +400,7 @@ itextnlike_escape(PG_FUNCTION_ARGS) ** */ + /*-------------------- * Match text and p, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT. * @@ -413,93 +413,18 @@ itextnlike_escape(PG_FUNCTION_ARGS) *-------------------- */ -#ifdef MULTIBYTE -/*-------------------- - * Support routine for MatchText. Compares given multibyte streams - * as wide characters. If they match, returns 1 otherwise returns 0. - *-------------------- - */ -static int wchareq(unsigned char *p1, unsigned char *p2) -{ - int l; - - l = pg_mblen(p1); - if (pg_mblen(p2) != l) { - return(0); - } - while (l--) { - if (*p1++ != *p2++) - return(0); - } - return(1); -} - -/*-------------------- - * Support routine for MatchTextLower. Compares given multibyte streams - * as wide characters ignoring case. - * If they match, returns 1 otherwise returns 0. - *-------------------- - */ -#define UCHARMAX 0xff - -static int iwchareq(unsigned char *p1, unsigned char *p2) -{ - int c1, c2; - int l; - - /* short cut. if *p1 and *p2 is lower than UCHARMAX, then - we assume they are ASCII */ - if (*p1 < UCHARMAX && *p2 < UCHARMAX) - return(tolower(*p1) == tolower(*p2)); - - if (*p1 < UCHARMAX) - c1 = tolower(*p1); - else - { - l = pg_mblen(p1); - (void)pg_mb2wchar_with_len(p1, (pg_wchar *)&c1, l); - c1 = tolower(c1); - } - if (*p2 < UCHARMAX) - c2 = tolower(*p2); - else - { - l = pg_mblen(p2); - (void)pg_mb2wchar_with_len(p2, (pg_wchar *)&c2, l); - c2 = tolower(c2); - } - return(c1 == c2); -} -#endif - -#ifdef MULTIBYTE -#define CHAREQ(p1, p2) wchareq(p1, p2) -#define ICHAREQ(p1, p2) iwchareq(p1, p2) -#define NextChar(p, plen) {int __l = pg_mblen(p); (p) +=__l; (plen) -=__l;} -#else -#define CHAREQ(p1, p2) (*(p1) == *(p2)) -#define ICHAREQ(p1, p2) (tolower(*(p1)) == tolower(*(p2))) -#define NextChar(p, plen) (p)++, (plen)-- -#endif - static int -MatchText(unsigned char * t, int tlen, unsigned char * p, int plen, char *e) +MatchText(unsigned char * t, int tlen, unsigned char * p, int plen) { - /* Fast path for match-everything pattern - * Include weird case of escape character as a percent sign or underscore, - * when presumably that wildcard character becomes a literal. - */ - if ((plen == 1) && (*p == '%') - && ! ((e != NULL) && (*e == '%'))) + /* Fast path for match-everything pattern */ + if ((plen == 1) && (*p == '%')) return LIKE_TRUE; while ((tlen > 0) && (plen > 0)) { - /* If an escape character was specified and we find it here in the pattern, - * then we'd better have an exact match for the next character. - */ - if ((e != NULL) && CHAREQ(p,e)) + if (*p == '\\') { + /* Next pattern char must match literally, whatever it is */ NextChar(p, plen); if ((plen <= 0) || !CHAREQ(t,p)) return LIKE_FALSE; @@ -525,10 +450,9 @@ MatchText(unsigned char * t, int tlen, unsigned char * p, int plen, char *e) * recurse unless first pattern char might match this * text char. */ - if (CHAREQ(t,p) || (*p == '_') - || ((e != NULL) && CHAREQ(p,e))) + if (CHAREQ(t,p) || (*p == '\\') || (*p == '_')) { - int matched = MatchText(t, tlen, p, plen, e); + int matched = MatchText(t, tlen, p, plen); if (matched != LIKE_FALSE) return matched; /* TRUE or ABORT */ @@ -571,24 +495,21 @@ MatchText(unsigned char * t, int tlen, unsigned char * p, int plen, char *e) return LIKE_ABORT; } /* MatchText() */ +/* + * Same as above, but ignore case + */ static int -MatchTextLower(unsigned char * t, int tlen, unsigned char * p, int plen, char *e) +MatchTextIC(unsigned char * t, int tlen, unsigned char * p, int plen) { - /* Fast path for match-everything pattern - * Include weird case of escape character as a percent sign or underscore, - * when presumably that wildcard character becomes a literal. - */ - if ((plen == 1) && (*p == '%') - && ! ((e != NULL) && (*e == '%'))) + /* Fast path for match-everything pattern */ + if ((plen == 1) && (*p == '%')) return LIKE_TRUE; while ((tlen > 0) && (plen > 0)) { - /* If an escape character was specified and we find it here in the pattern, - * then we'd better have an exact match for the next character. - */ - if ((e != NULL) && ICHAREQ(p,e)) + if (*p == '\\') { + /* Next pattern char must match literally, whatever it is */ NextChar(p, plen); if ((plen <= 0) || !ICHAREQ(t,p)) return LIKE_FALSE; @@ -614,10 +535,9 @@ MatchTextLower(unsigned char * t, int tlen, unsigned char * p, int plen, char *e * recurse unless first pattern char might match this * text char. */ - if (ICHAREQ(t,p) || (*p == '_') - || ((e != NULL) && ICHAREQ(p,e))) + if (ICHAREQ(t,p) || (*p == '\\') || (*p == '_')) { - int matched = MatchText(t, tlen, p, plen, e); + int matched = MatchTextIC(t, tlen, p, plen); if (matched != LIKE_FALSE) return matched; /* TRUE or ABORT */ @@ -634,6 +554,9 @@ MatchTextLower(unsigned char * t, int tlen, unsigned char * p, int plen, char *e } else if ((*p != '_') && !ICHAREQ(t,p)) { + /* Not the single-character wildcard and no explicit match? + * Then time to quit... + */ return LIKE_FALSE; } @@ -655,4 +578,4 @@ MatchTextLower(unsigned char * t, int tlen, unsigned char * p, int plen, char *e * start matching this pattern. */ return LIKE_ABORT; -} /* MatchTextLower() */ +} /* MatchTextIC() */ |