aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2013-07-31 11:31:33 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2013-07-31 11:31:33 -0400
commit1346f40b584d8072bdacdf312027c0ef57e4b7c2 (patch)
treee97890df7e1ed57b315c755983da442f7e794077 /src
parented33ad39a2f353acc3d38591a749f324de3b6c23 (diff)
downloadpostgresql-1346f40b584d8072bdacdf312027c0ef57e4b7c2.tar.gz
postgresql-1346f40b584d8072bdacdf312027c0ef57e4b7c2.zip
Fix regexp_matches() handling of zero-length matches.
We'd find the same match twice if it was of zero length and not immediately adjacent to the previous match. replace_text_regexp() got similar cases right, so adjust this search logic to match that. Note that even though the regexp_split_to_xxx() functions share this code, they did not display equivalent misbehavior, because the second match would be considered degenerate and ignored. Jeevan Chalke, with some cosmetic changes by me.
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/regexp.c13
-rw-r--r--src/backend/utils/adt/varlena.c5
-rw-r--r--src/test/regress/expected/strings.out58
-rw-r--r--src/test/regress/sql/strings.sql7
4 files changed, 75 insertions, 8 deletions
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index 67e205d0816..b15b0905b74 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -957,14 +957,13 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
break;
/*
- * Advance search position. Normally we start just after the end of
- * the previous match, but always advance at least one character (the
- * special case can occur if the pattern matches zero characters just
- * after the prior match or at the end of the string).
+ * Advance search position. Normally we start the next search at the
+ * end of the previous match; but if the match was of zero length, we
+ * have to advance by one character, or we'd just find the same match
+ * again.
*/
- if (start_search < pmatch[0].rm_eo)
- start_search = pmatch[0].rm_eo;
- else
+ start_search = prev_match_end;
+ if (pmatch[0].rm_so == pmatch[0].rm_eo)
start_search++;
if (start_search > wide_len)
break;
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 3e2243958a6..fe06f1b5527 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -2902,7 +2902,10 @@ replace_text_regexp(text *src_text, void *regexp,
break;
/*
- * Search from next character when the matching text is zero width.
+ * Advance search position. Normally we start the next search at the
+ * end of the previous match; but if the match was of zero length, we
+ * have to advance by one character, or we'd just find the same match
+ * again.
*/
search_start = data_pos;
if (pmatch[0].rm_so == pmatch[0].rm_eo)
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 1bd6772ddd4..34fa4b40976 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -440,6 +440,64 @@ SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
{barbeque}
(1 row)
+-- start/end-of-line matches are of zero length
+SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '^', 'mg');
+ regexp_matches
+----------------
+ {""}
+ {""}
+ {""}
+ {""}
+(4 rows)
+
+SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '$', 'mg');
+ regexp_matches
+----------------
+ {""}
+ {""}
+ {""}
+ {""}
+(4 rows)
+
+SELECT regexp_matches('1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '^.?', 'mg');
+ regexp_matches
+----------------
+ {1}
+ {2}
+ {3}
+ {4}
+ {""}
+(5 rows)
+
+SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '.?$', 'mg');
+ regexp_matches
+----------------
+ {""}
+ {1}
+ {""}
+ {2}
+ {""}
+ {3}
+ {""}
+ {4}
+ {""}
+ {""}
+(10 rows)
+
+SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4', '.?$', 'mg');
+ regexp_matches
+----------------
+ {""}
+ {1}
+ {""}
+ {2}
+ {""}
+ {3}
+ {""}
+ {4}
+ {""}
+(9 rows)
+
-- give me errors
SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'gz');
ERROR: invalid regexp option: "z"
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 6ef446308b5..198b85c8df9 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -158,6 +158,13 @@ SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$);
-- no capture groups
SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
+-- start/end-of-line matches are of zero length
+SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '^', 'mg');
+SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '$', 'mg');
+SELECT regexp_matches('1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '^.?', 'mg');
+SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '.?$', 'mg');
+SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4', '.?$', 'mg');
+
-- give me errors
SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'gz');
SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$);