diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2021-08-23 17:41:07 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2021-08-23 17:41:07 -0400 |
commit | 779557bd22895420b48eba409d2286f1dea08c06 (patch) | |
tree | 399163a9823fd42a39758df315202c78bef06f7e /src/test | |
parent | e3fb6170e58c4325cd1e1eb22f96ef43c3b4152a (diff) | |
download | postgresql-779557bd22895420b48eba409d2286f1dea08c06.tar.gz postgresql-779557bd22895420b48eba409d2286f1dea08c06.zip |
Prevent regexp back-refs from sometimes matching when they shouldn't.
The recursion in cdissect() was careless about clearing match data
for capturing parentheses after rejecting a partial match. This
could allow a later back-reference to succeed when by rights it
should fail for lack of a defined referent.
To fix, think a little more rigorously about what the contract
between different levels of cdissect's recursion needs to be.
With the right spec, we can fix this using fewer rather than more
resets of the match data; the key decision being that a failed
sub-match is now explicitly responsible for clearing any matches
it may have set.
There are enough other cross-checks and optimizations in the code
that it's not especially easy to exhibit this problem; usually, the
match will fail as-expected. Plus, regexps that are even potentially
vulnerable are most likely user errors, since there's just not much
point in writing a back-ref that doesn't always have a referent.
These facts perhaps explain why the issue hasn't been detected,
even though it's almost certainly a couple of decades old.
Discussion: https://postgr.es/m/151435.1629733387@sss.pgh.pa.us
Diffstat (limited to 'src/test')
-rw-r--r-- | src/test/modules/test_regex/expected/test_regex.out | 14 | ||||
-rw-r--r-- | src/test/modules/test_regex/sql/test_regex.sql | 4 | ||||
-rw-r--r-- | src/test/regress/expected/regex.out | 13 | ||||
-rw-r--r-- | src/test/regress/sql/regex.sql | 4 |
4 files changed, 35 insertions, 0 deletions
diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out index 44da7d20190..83fb9a8b9ca 100644 --- a/src/test/modules/test_regex/expected/test_regex.out +++ b/src/test/modules/test_regex/expected/test_regex.out @@ -2636,6 +2636,20 @@ select * from test_regex('^(.+)( \1)+$', 'abc abc abd', 'RP'); {2,REG_UBACKREF,REG_UNONPOSIX} (1 row) +-- expectNomatch 14.30 RP {^(.)\1|\1.} {abcdef} +select * from test_regex('^(.)\1|\1.', 'abcdef', 'RP'); + test_regex +-------------------------------- + {1,REG_UBACKREF,REG_UNONPOSIX} +(1 row) + +-- expectNomatch 14.31 RP {^((.)\2|..)\2} {abadef} +select * from test_regex('^((.)\2|..)\2', 'abadef', 'RP'); + test_regex +-------------------------------- + {2,REG_UBACKREF,REG_UNONPOSIX} +(1 row) + -- back reference only matches the string, not any constraints select * from test_regex('(^\w+).*\1', 'abc abc abc', 'LRP'); test_regex diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql index 9224fdfdd3a..991f804cb67 100644 --- a/src/test/modules/test_regex/sql/test_regex.sql +++ b/src/test/modules/test_regex/sql/test_regex.sql @@ -769,6 +769,10 @@ select * from test_regex('^(.+)( \1)+$', 'abc abc abc', 'RP'); select * from test_regex('^(.+)( \1)+$', 'abc abd abc', 'RP'); -- expectNomatch 14.29 RP {^(.+)( \1)+$} {abc abc abd} select * from test_regex('^(.+)( \1)+$', 'abc abc abd', 'RP'); +-- expectNomatch 14.30 RP {^(.)\1|\1.} {abcdef} +select * from test_regex('^(.)\1|\1.', 'abcdef', 'RP'); +-- expectNomatch 14.31 RP {^((.)\2|..)\2} {abadef} +select * from test_regex('^((.)\2|..)\2', 'abadef', 'RP'); -- back reference only matches the string, not any constraints select * from test_regex('(^\w+).*\1', 'abc abc abc', 'LRP'); diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out index 86477cc506c..cbe2cfc3ea1 100644 --- a/src/test/regress/expected/regex.out +++ b/src/test/regress/expected/regex.out @@ -567,6 +567,19 @@ select 'a' ~ '()+\1'; t (1 row) +-- Test ancient oversight in when to apply zaptreesubs +select 'abcdef' ~ '^(.)\1|\1.' as f; + f +--- + f +(1 row) + +select 'abadef' ~ '^((.)\2|..)\2' as f; + f +--- + f +(1 row) + -- Add coverage for some cases in checkmatchall select regexp_match('xy', '.|...'); regexp_match diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql index b03a8d9ac22..c6974a43d11 100644 --- a/src/test/regress/sql/regex.sql +++ b/src/test/regress/sql/regex.sql @@ -135,6 +135,10 @@ select 'a' ~ '.. ()|\1'; select 'a' ~ '()*\1'; select 'a' ~ '()+\1'; +-- Test ancient oversight in when to apply zaptreesubs +select 'abcdef' ~ '^(.)\1|\1.' as f; +select 'abadef' ~ '^((.)\2|..)\2' as f; + -- Add coverage for some cases in checkmatchall select regexp_match('xy', '.|...'); select regexp_match('xyz', '.|...'); |