diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2004-12-02 02:45:24 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2004-12-02 02:45:24 +0000 |
commit | 6ba32b24b28d10b24af92d1e8312cac4cef5ddb6 (patch) | |
tree | 03cb9ea2567788c2997e7e81c1c1d339b178e64e /src/backend/utils/adt | |
parent | d237a12e493f6e1b562579bf8361040ac1ba2470 (diff) | |
download | postgresql-6ba32b24b28d10b24af92d1e8312cac4cef5ddb6.tar.gz postgresql-6ba32b24b28d10b24af92d1e8312cac4cef5ddb6.zip |
Teach regex_fixed_prefix() the correct handling of advanced regex
escapes --- they aren't simply quoted characters. Problem noted by
Antti Salmela. Also fix problem with incorrect handling of multibyte
characters when followed by a quantifier.
Diffstat (limited to 'src/backend/utils/adt')
-rw-r--r-- | src/backend/utils/adt/selfuncs.c | 34 |
1 files changed, 26 insertions, 8 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 34e4d767962..680a8d53fd6 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.147.2.3 2004/02/27 21:44:44 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.147.2.4 2004/12/02 02:45:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3218,6 +3218,8 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, char *match; int pos, match_pos, + prev_pos, + prev_match_pos, paren_depth; char *patt; char *rest; @@ -3278,11 +3280,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, /* OK, allocate space for pattern */ match = palloc(strlen(patt) + 1); - match_pos = 0; + prev_match_pos = match_pos = 0; /* note start at pos 1 to skip leading ^ */ - for (pos = 1; patt[pos]; pos++) + for (prev_pos = pos = 1; patt[pos]; ) { + int len; + /* * Check for characters that indicate multiple possible matches * here. XXX I suspect isalpha() is not an adequately @@ -3297,6 +3301,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, break; /* + * In AREs, backslash followed by alphanumeric is an escape, not + * a quoted character. Must treat it as having multiple possible + * matches. + */ + if (patt[pos] == '\\' && isalnum((unsigned char) patt[pos + 1])) + break; + + /* * Check for quantifiers. Except for +, this means the preceding * character is optional, so we must remove it from the prefix * too! @@ -3305,14 +3317,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, patt[pos] == '?' || patt[pos] == '{') { - if (match_pos > 0) - match_pos--; - pos--; + match_pos = prev_match_pos; + pos = prev_pos; break; } if (patt[pos] == '+') { - pos--; + pos = prev_pos; break; } if (patt[pos] == '\\') @@ -3322,7 +3333,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, if (patt[pos] == '\0') break; } - match[match_pos++] = patt[pos]; + /* save position in case we need to back up on next loop cycle */ + prev_match_pos = match_pos; + prev_pos = pos; + /* must use encoding-aware processing here */ + len = pg_mblen(&patt[pos]); + memcpy(&match[match_pos], &patt[pos], len); + match_pos += len; + pos += len; } match[match_pos] = '\0'; |