diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2019-09-07 14:21:59 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2019-09-07 14:21:59 -0400 |
commit | ca70bdaefea5188066b3c2a6eaaaa1cb8cb8ce06 (patch) | |
tree | 256506db68ed8fb1dce26496d7cebfbdfecc4f1c /src/backend/utils/adt/regexp.c | |
parent | c5bc7050aff1d1bba6532377fe37b351581661a8 (diff) | |
download | postgresql-ca70bdaefea5188066b3c2a6eaaaa1cb8cb8ce06.tar.gz postgresql-ca70bdaefea5188066b3c2a6eaaaa1cb8cb8ce06.zip |
Fix issues around strictness of SIMILAR TO.
As a result of some long-ago quick hacks, the SIMILAR TO operator
and the corresponding flavor of substring() interpreted "ESCAPE NULL"
as selecting the default escape character '\'. This is both
surprising and not per spec: the standard is clear that these
functions should return NULL for NULL input.
Additionally, because of inconsistency of the strictness markings
of 3-argument substring() and similar_escape(), the planner could not
inline the SQL definition of substring(), resulting in a substantial
performance penalty compared to the underlying POSIX substring()
function.
The simplest fix for this would be to change the strictness marking
of similar_escape(), but if we do that we risk breaking existing views
that depend on that function. Hence, leave similar_escape() as-is
as a compatibility function, and instead invent a new function
similar_to_escape() that comes in two strict variants.
There are a couple of other behaviors in this area that are also
not per spec, but they are documented and seem generally at least
as sane as the spec's definition, so leave them alone. But improve
the documentation to describe them fully.
Patch by me; thanks to Álvaro Herrera and Andrew Gierth for review
and discussion.
Discussion: https://postgr.es/m/14047.1557708214@sss.pgh.pa.us
Diffstat (limited to 'src/backend/utils/adt/regexp.c')
-rw-r--r-- | src/backend/utils/adt/regexp.c | 85 |
1 files changed, 71 insertions, 14 deletions
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index 90a9197792e..3d38aef820c 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -654,15 +654,18 @@ textregexreplace(PG_FUNCTION_ARGS) } /* - * similar_escape() - * Convert a SQL:2008 regexp pattern to POSIX style, so it can be used by - * our regexp engine. + * similar_to_escape(), similar_escape() + * + * Convert a SQL "SIMILAR TO" regexp pattern to POSIX style, so it can be + * used by our regexp engine. + * + * similar_escape_internal() is the common workhorse for three SQL-exposed + * functions. esc_text can be passed as NULL to select the default escape + * (which is '\'), or as an empty string to select no escape character. */ -Datum -similar_escape(PG_FUNCTION_ARGS) +static text * +similar_escape_internal(text *pat_text, text *esc_text) { - text *pat_text; - text *esc_text; text *result; char *p, *e, @@ -673,13 +676,9 @@ similar_escape(PG_FUNCTION_ARGS) bool incharclass = false; int nquotes = 0; - /* This function is not strict, so must test explicitly */ - if (PG_ARGISNULL(0)) - PG_RETURN_NULL(); - pat_text = PG_GETARG_TEXT_PP(0); p = VARDATA_ANY(pat_text); plen = VARSIZE_ANY_EXHDR(pat_text); - if (PG_ARGISNULL(1)) + if (esc_text == NULL) { /* No ESCAPE clause provided; default to backslash as escape */ e = "\\"; @@ -687,12 +686,11 @@ similar_escape(PG_FUNCTION_ARGS) } else { - esc_text = PG_GETARG_TEXT_PP(1); e = VARDATA_ANY(esc_text); elen = VARSIZE_ANY_EXHDR(esc_text); if (elen == 0) e = NULL; /* no escape character */ - else + else if (elen > 1) { int escape_mblen = pg_mbstrlen_with_len(e, elen); @@ -898,6 +896,65 @@ similar_escape(PG_FUNCTION_ARGS) SET_VARSIZE(result, r - ((char *) result)); + return result; +} + +/* + * similar_to_escape(pattern, escape) + */ +Datum +similar_to_escape_2(PG_FUNCTION_ARGS) +{ + text *pat_text = PG_GETARG_TEXT_PP(0); + text *esc_text = PG_GETARG_TEXT_PP(1); + text *result; + + result = similar_escape_internal(pat_text, esc_text); + + PG_RETURN_TEXT_P(result); +} + +/* + * similar_to_escape(pattern) + * Inserts a default escape character. + */ +Datum +similar_to_escape_1(PG_FUNCTION_ARGS) +{ + text *pat_text = PG_GETARG_TEXT_PP(0); + text *result; + + result = similar_escape_internal(pat_text, NULL); + + PG_RETURN_TEXT_P(result); +} + +/* + * similar_escape(pattern, escape) + * + * Legacy function for compatibility with views stored using the + * pre-v13 expansion of SIMILAR TO. Unlike the above functions, this + * is non-strict, which leads to not-per-spec handling of "ESCAPE NULL". + */ +Datum +similar_escape(PG_FUNCTION_ARGS) +{ + text *pat_text; + text *esc_text; + text *result; + + /* This function is not strict, so must test explicitly */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + pat_text = PG_GETARG_TEXT_PP(0); + + if (PG_ARGISNULL(1)) + esc_text = NULL; /* use default escape character */ + else + esc_text = PG_GETARG_TEXT_PP(1); + + result = similar_escape_internal(pat_text, esc_text); + PG_RETURN_TEXT_P(result); } |