aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Gierth <rhodiumtoad@postgresql.org>2018-09-12 19:31:06 +0100
committerAndrew Gierth <rhodiumtoad@postgresql.org>2018-09-12 19:44:28 +0100
commitab78c6e36635da96c5ef8c229942a16b5391c0b9 (patch)
tree4642ff6091df246b7bd3813fb0d225d25b0992a9
parent6592d89068b694af6894117e629ab45c1e1f49b2 (diff)
downloadpostgresql-ab78c6e36635da96c5ef8c229942a16b5391c0b9.tar.gz
postgresql-ab78c6e36635da96c5ef8c229942a16b5391c0b9.zip
Repair bug in regexp split performance improvements.
Commit c8ea87e4b introduced a temporary conversion buffer for substrings extracted during regexp splits. Unfortunately the code that sized it was failing to ignore the effects of ignored degenerate regexp matches, so for regexp_split_* calls it could under-size the buffer in such cases. Fix, and add some regression test cases (though those will only catch the bug if run in a multibyte encoding). Backpatch to 9.3 as the faulty code was. Thanks to the PostGIS project, Regina Obe and Paul Ramsey for the report (via IRC) and assistance in analysis. Patch by me.
-rw-r--r--src/backend/utils/adt/regexp.c16
-rw-r--r--src/test/regress/expected/strings.out18
-rw-r--r--src/test/regress/sql/strings.sql3
3 files changed, 31 insertions, 6 deletions
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index 3b7adfb0479..92525c2c4de 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -982,6 +982,7 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
int array_len;
int array_idx;
int prev_match_end;
+ int prev_valid_match_end;
int start_search;
int maxlen = 0; /* largest fetch length in characters */
@@ -1024,6 +1025,7 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
/* search for the pattern, perhaps repeatedly */
prev_match_end = 0;
+ prev_valid_match_end = 0;
start_search = 0;
while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
pmatch_len, pmatch))
@@ -1076,13 +1078,15 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
matchctx->nmatches++;
/*
- * check length of unmatched portion between end of previous match
- * and start of current one
+ * check length of unmatched portion between end of previous valid
+ * (nondegenerate, or degenerate but not ignored) match and start
+ * of current one
*/
if (fetching_unmatched &&
pmatch[0].rm_so >= 0 &&
- (pmatch[0].rm_so - prev_match_end) > maxlen)
- maxlen = (pmatch[0].rm_so - prev_match_end);
+ (pmatch[0].rm_so - prev_valid_match_end) > maxlen)
+ maxlen = (pmatch[0].rm_so - prev_valid_match_end);
+ prev_valid_match_end = pmatch[0].rm_eo;
}
prev_match_end = pmatch[0].rm_eo;
@@ -1108,8 +1112,8 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
* input string
*/
if (fetching_unmatched &&
- (wide_len - prev_match_end) > maxlen)
- maxlen = (wide_len - prev_match_end);
+ (wide_len - prev_valid_match_end) > maxlen)
+ maxlen = (wide_len - prev_valid_match_end);
/*
* Keep a note of the end position of the string for the benefit of
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 35cadb24aa1..3b1a7d80b82 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -674,6 +674,24 @@ SELECT regexp_split_to_array('123456','.');
{"","","","","","",""}
(1 row)
+SELECT regexp_split_to_array('123456','');
+ regexp_split_to_array
+-----------------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT regexp_split_to_array('123456','(?:)');
+ regexp_split_to_array
+-----------------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT regexp_split_to_array('1','');
+ regexp_split_to_array
+-----------------------
+ {1}
+(1 row)
+
-- errors
SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'zippy') AS foo;
ERROR: invalid regexp option: "z"
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index f9cfaeb44ac..5e39458bd22 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -188,6 +188,9 @@ SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', 'nom
SELECT regexp_split_to_array('123456','1');
SELECT regexp_split_to_array('123456','6');
SELECT regexp_split_to_array('123456','.');
+SELECT regexp_split_to_array('123456','');
+SELECT regexp_split_to_array('123456','(?:)');
+SELECT regexp_split_to_array('1','');
-- errors
SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'zippy') AS foo;
SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'iz');