diff options
Diffstat (limited to 'src/backend/utils/adt/regexp.c')
-rw-r--r-- | src/backend/utils/adt/regexp.c | 73 |
1 files changed, 65 insertions, 8 deletions
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index da13a875eb0..ab44846e0b3 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -708,20 +708,42 @@ similar_escape(PG_FUNCTION_ARGS) * We surround the transformed input string with * ^(?: ... )$ * which requires some explanation. We need "^" and "$" to force - * the pattern to match the entire input string as per SQL99 spec. + * the pattern to match the entire input string as per the SQL spec. * The "(?:" and ")" are a non-capturing set of parens; we have to have * parens in case the string contains "|", else the "^" and "$" will * be bound into the first and last alternatives which is not what we * want, and the parens must be non capturing because we don't want them * to count when selecting output for SUBSTRING. + * + * When the pattern is divided into three parts by escape-double-quotes, + * what we emit is + * ^(?:part1){1,1}?(part2){1,1}(?:part3)$ + * which requires even more explanation. The "{1,1}?" on part1 makes it + * non-greedy so that it will match the smallest possible amount of text + * not the largest, as required by SQL. The plain parens around part2 + * are capturing parens so that that part is what controls the result of + * SUBSTRING. The "{1,1}" forces part2 to be greedy, so that it matches + * the largest possible amount of text; hence part3 must match the + * smallest amount of text, as required by SQL. We don't need an explicit + * greediness marker on part3. Note that this also confines the effects + * of any "|" characters to the respective part, which is what we want. + * + * The SQL spec says that SUBSTRING's pattern must contain exactly two + * escape-double-quotes, but we only complain if there's more than two. + * With none, we act as though part1 and part3 are empty; with one, we + * act as though part3 is empty. Both behaviors fall out of omitting + * the relevant part separators in the above expansion. If the result + * of this function is used in a plain regexp match (SIMILAR TO), the + * escape-double-quotes have no effect on the match behavior. *---------- */ /* - * We need room for the prefix/postfix plus as many as 3 output bytes per - * input byte; since the input is at most 1GB this can't overflow + * We need room for the prefix/postfix and part separators, plus as many + * as 3 output bytes per input byte; since the input is at most 1GB this + * can't overflow size_t. */ - result = (text *) palloc(VARHDRSZ + 6 + 3 * plen); + result = (text *) palloc(VARHDRSZ + 23 + 3 * (size_t) plen); r = VARDATA(result); *r++ = '^'; @@ -760,7 +782,7 @@ similar_escape(PG_FUNCTION_ARGS) } else if (e && elen == mblen && memcmp(e, p, mblen) == 0) { - /* SQL99 escape character; do not send to output */ + /* SQL escape character; do not send to output */ afterescape = true; } else @@ -784,10 +806,45 @@ similar_escape(PG_FUNCTION_ARGS) /* fast path */ if (afterescape) { - if (pchar == '"' && !incharclass) /* for SUBSTRING patterns */ - *r++ = ((nquotes++ % 2) == 0) ? '(' : ')'; + if (pchar == '"' && !incharclass) /* escape-double-quote? */ + { + /* emit appropriate part separator, per notes above */ + if (nquotes == 0) + { + *r++ = ')'; + *r++ = '{'; + *r++ = '1'; + *r++ = ','; + *r++ = '1'; + *r++ = '}'; + *r++ = '?'; + *r++ = '('; + } + else if (nquotes == 1) + { + *r++ = ')'; + *r++ = '{'; + *r++ = '1'; + *r++ = ','; + *r++ = '1'; + *r++ = '}'; + *r++ = '('; + *r++ = '?'; + *r++ = ':'; + } + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER), + errmsg("SQL regular expression may not contain more than two escape-double-quote separators"))); + nquotes++; + } else { + /* + * We allow any character at all to be escaped; notably, this + * allows access to POSIX character-class escapes such as + * "\d". The SQL spec is considerably more restrictive. + */ *r++ = '\\'; *r++ = pchar; } @@ -795,7 +852,7 @@ similar_escape(PG_FUNCTION_ARGS) } else if (e && pchar == *e) { - /* SQL99 escape character; do not send to output */ + /* SQL escape character; do not send to output */ afterescape = true; } else if (incharclass) |