aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNeil Conway <neilc@samurai.com>2007-03-20 05:45:00 +0000
committerNeil Conway <neilc@samurai.com>2007-03-20 05:45:00 +0000
commit9eb78beeae01f2f0ccafc5d66a2003ea7e3952f9 (patch)
tree8e4139ea736ab91319fff21b8654ef4d9150ac55
parent5e96b04a7cb1f35aa1b75680f8bfbeac7cedc178 (diff)
downloadpostgresql-9eb78beeae01f2f0ccafc5d66a2003ea7e3952f9.tar.gz
postgresql-9eb78beeae01f2f0ccafc5d66a2003ea7e3952f9.zip
Add three new regexp functions: regexp_matches, regexp_split_to_array,
and regexp_split_to_table. These functions provide access to the capture groups resulting from a POSIX regular expression match, and provide the ability to split a string on a POSIX regular expression, respectively. Patch from Jeremy Drake; code review by Neil Conway, additional comments and suggestions from Tom and Peter E. This patch bumps the catversion, adds some regression tests, and updates the docs.
-rw-r--r--doc/src/sgml/func.sgml184
-rw-r--r--src/backend/utils/adt/regexp.c581
-rw-r--r--src/include/catalog/catversion.h4
-rw-r--r--src/include/catalog/pg_proc.h14
-rw-r--r--src/include/utils/builtins.h8
-rw-r--r--src/test/regress/expected/strings.out227
-rw-r--r--src/test/regress/sql/strings.sql53
7 files changed, 1001 insertions, 70 deletions
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index b8be507f2df..084db0d40e5 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.369 2007/02/20 19:59:04 momjian Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.370 2007/03/20 05:44:59 neilc Exp $ -->
<chapter id="functions">
<title>Functions and Operators</title>
@@ -1468,18 +1468,53 @@
</row>
<row>
- <entry><literal><function>regexp_replace</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>, <parameter>replacement</parameter> <type>text</type> [,<parameter>flags</parameter> <type>text</type>])</literal></entry>
+ <entry><literal><function>regexp_matches</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type>])</literal></entry>
+ <entry><type>setof text[]</type></entry>
+ <entry>
+ Return all capture groups resulting from matching POSIX regular
+ expression against the <parameter>string</parameter>. See
+ <xref linkend="functions-posix-regexp"> for more information.
+ </entry>
+ <entry><literal>regexp_matches('foobarbequebaz', '(bar)(beque)')</literal></entry>
+ <entry><literal>{bar,beque}</literal></entry>
+ </row>
+
+ <row>
+ <entry><literal><function>regexp_replace</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>, <parameter>replacement</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type>])</literal></entry>
<entry><type>text</type></entry>
<entry>
Replace substring matching POSIX regular expression. See
- <xref linkend="functions-matching"> for more information on pattern
- matching.
+ <xref linkend="functions-posix-regexp"> for more information.
</entry>
<entry><literal>regexp_replace('Thomas', '.[mN]a.', 'M')</literal></entry>
<entry><literal>ThM</literal></entry>
</row>
<row>
+ <entry><literal><function>regexp_split_to_array</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type> ])</literal></entry>
+ <entry><type>text[]</type></entry>
+ <entry>
+ Split <parameter>string</parameter> using POSIX regular expression as
+ the delimiter. See <xref linkend="functions-posix-regexp"> for more
+ information.
+ </entry>
+ <entry><literal>regexp_split_to_array('hello world', E'\\s+')</literal></entry>
+ <entry><literal>{hello,world}</literal></entry>
+ </row>
+
+ <row>
+ <entry><literal><function>regexp_split_to_table</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type>])</literal></entry>
+ <entry><type>setof text</type></entry>
+ <entry>
+ Split <parameter>string</parameter> using POSIX regular expression as
+ the delimiter. See <xref linkend="functions-posix-regexp"> for more
+ information.
+ </entry>
+ <entry><literal>regexp_split_to_table('hello world', E'\\s+')</literal></entry>
+ <entry><literal>hello</literal><para><literal>world</literal></para> (2 rows)</entry>
+ </row>
+
+ <row>
<entry><literal><function>repeat</function>(<parameter>string</parameter> <type>text</type>, <parameter>number</parameter> <type>int</type>)</literal></entry>
<entry><type>text</type></entry>
<entry>Repeat <parameter>string</parameter> the specified
@@ -2883,9 +2918,6 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation>
<indexterm>
<primary>substring</primary>
</indexterm>
- <indexterm>
- <primary>regexp_replace</primary>
- </indexterm>
<synopsis>
<replaceable>string</replaceable> SIMILAR TO <replaceable>pattern</replaceable> <optional>ESCAPE <replaceable>escape-character</replaceable></optional>
@@ -3004,6 +3036,21 @@ substring('foobar' from '#"o_b#"%' for '#') <lineannotation>NULL</lineannotat
<primary>regular expression</primary>
<seealso>pattern matching</seealso>
</indexterm>
+ <indexterm>
+ <primary>substring</primary>
+ </indexterm>
+ <indexterm>
+ <primary>regexp_replace</primary>
+ </indexterm>
+ <indexterm>
+ <primary>regexp_matches</primary>
+ </indexterm>
+ <indexterm>
+ <primary>regexp_split_to_table</primary>
+ </indexterm>
+ <indexterm>
+ <primary>regexp_split_to_array</primary>
+ </indexterm>
<para>
<xref linkend="functions-posix-table"> lists the available
@@ -3134,7 +3181,10 @@ substring('foobar' from 'o(.)b') <lineannotation>o</lineannotation>
string containing zero or more single-letter flags that change the
function's behavior. Flag <literal>i</> specifies case-insensitive
matching, while flag <literal>g</> specifies replacement of each matching
- substring rather than only the first one.
+ substring rather than only the first one. Other supported flags are
+ <literal>m</>, <literal>n</>, <literal>p</>, <literal>w</> and
+ <literal>x</>, whose meanings correspond to those shown in
+ <xref linkend="posix-embedded-options-table">.
</para>
<para>
@@ -3149,6 +3199,124 @@ regexp_replace('foobarbaz', 'b(..)', E'X\\1Y', 'g')
</programlisting>
</para>
+ <para>
+ The <function>regexp_matches</> function returns all of the capture
+ groups resulting from matching a POSIX regular expression pattern.
+ It has the syntax
+ <function>regexp_matches</function>(<replaceable>string</>, <replaceable>pattern</>
+ <optional>, <replaceable>flags</> </optional>).
+ If there is no match to the <replaceable>pattern</>, the function returns no rows.
+ If there is a match, the function returns the contents of all of the capture groups
+ in a text array, or if there were no capture groups in the pattern, it returns the
+ contents of the entire match as a single-element text array.
+ The <replaceable>flags</> parameter is an optional text
+ string containing zero or more single-letter flags that change the
+ function's behavior. Flag <literal>i</> specifies case-insensitive
+ matching, while flag <literal>g</> causes the return of each matching
+ substring rather than only the first one. Other supported
+ flags are <literal>m</>, <literal>n</>, <literal>p</>, <literal>w</> and
+ <literal>x</>, whose meanings are described in
+ <xref linkend="posix-embedded-options-table">.
+ </para>
+
+ <para>
+ Some examples:
+<programlisting>
+SELECT regexp_matches('foobarbequebaz', '(bar)(beque)');
+ regexp_matches
+----------------
+ {bar,beque}
+(1 row)
+
+SELECT regexp_matches('foobarbequebazilbarfbonk', '(b[^b]+)(b[^b]+)', 'g');
+ regexp_matches
+----------------
+ {bar,beque}
+ {bazil,barf}
+(2 rows)
+
+SELECT regexp_matches('foobarbequebaz', 'barbeque');
+ regexp_matches
+----------------
+ {barbeque}
+(1 row)
+</programlisting>
+ </para>
+
+ <para>
+ The <function>regexp_split_to_table</> function splits a string using a POSIX
+ regular expression pattern as a delimiter. It has the syntax
+ <function>regexp_split_to_table</function>(<replaceable>string</>, <replaceable>pattern</>
+ <optional>, <replaceable>flags</> </optional>).
+ If there is no match to the <replaceable>pattern</>, the function returns the
+ <replaceable>string</>. If there is at least one match, for each match it returns
+ the text from the end of the last match (or the beginning of the string)
+ to the beginning of the match. When there are no more matches, it
+ returns the text from the end of the last match to the end of the string.
+ The <replaceable>flags</> parameter is an optional text string containing
+ zero or more single-letter flags that change the function's behavior.
+ <function>regexp_split_to_table</function> supports the flags <literal>i</>,
+ <literal>m</>, <literal>n</>, <literal>p</>, <literal>w</> and
+ <literal>x</>, whose meanings are described in
+ <xref linkend="posix-embedded-options-table">.
+ </para>
+
+ <para>
+ The <function>regexp_split_to_array</> function behaves the same as
+ <function>regexp_split_to_table</>, except that <function>regexp_split_to_array</>
+ returns its results as a <type>text[]</>. It has the syntax
+ <function>regexp_split_to_array</function>(<replaceable>string</>, <replaceable>pattern</>
+ <optional>, <replaceable>flags</> </optional>).
+ The parameters are the same as for <function>regexp_split_to_table</>.
+ </para>
+
+ <para>
+ Some examples:
+<programlisting>
+
+SELECT foo FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', E'\\\s+') AS foo;
+ foo
+--------
+ the
+ quick
+ brown
+ fox
+ jumped
+ over
+ the
+ lazy
+ dog
+(9 rows)
+
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', E'\\s+');
+ regexp_split_to_array
+------------------------------------------------
+ {the,quick,brown,fox,jumped,over,the,lazy,dog}
+(1 row)
+
+SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo;
+ foo
+-----
+ t
+ h
+ e
+ q
+ u
+ i
+ c
+ k
+ b
+ r
+ o
+ w
+ n
+ f
+ o
+ x
+(16 rows)
+</programlisting>
+ </para>
+
<para>
<productname>PostgreSQL</productname>'s regular expressions are implemented
using a package written by Henry Spencer. Much of
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index 6a7a5fcc13b..9f8dd0323a9 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.69 2007/02/27 23:48:08 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.70 2007/03/20 05:44:59 neilc Exp $
*
* Alistair Crooks added the code for the regex caching
* agc - cached the regular expressions used - there's a good chance
@@ -29,9 +29,12 @@
*/
#include "postgres.h"
+#include "access/heapam.h"
+#include "funcapi.h"
#include "regex/regex.h"
#include "utils/builtins.h"
#include "utils/guc.h"
+#include "utils/lsyscache.h"
/* GUC-settable flavor parameter */
@@ -75,9 +78,56 @@ typedef struct cached_re_str
regex_t cre_re; /* the compiled regular expression */
} cached_re_str;
+typedef struct re_comp_flags
+{
+ int cflags;
+ bool glob;
+} re_comp_flags;
+
+typedef struct regexp_matches_ctx
+{
+ text *orig_str;
+ size_t orig_len;
+ pg_wchar *wide_str;
+ size_t wide_len;
+ regex_t *cpattern;
+ regmatch_t *pmatch;
+ size_t offset;
+
+ re_comp_flags flags;
+
+ /* text type info */
+ Oid param_type;
+ int16 typlen;
+ bool typbyval;
+ char typalign;
+} regexp_matches_ctx;
+
+typedef struct regexp_split_ctx
+{
+ text *orig_str;
+ size_t orig_len;
+ pg_wchar *wide_str;
+ size_t wide_len;
+ regex_t *cpattern;
+ regmatch_t match;
+ size_t offset;
+ re_comp_flags flags;
+} regexp_split_ctx;
+
+
static int num_res = 0; /* # of cached re's */
static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */
+static regexp_matches_ctx *setup_regexp_matches(FunctionCallInfo fcinfo,
+ text *orig_str, text *pattern,
+ text *flags);
+static ArrayType *perform_regexp_matches(regexp_matches_ctx *matchctx);
+
+static regexp_split_ctx *setup_regexp_split(text *str, text *pattern,
+ text *flags);
+static Datum get_next_split(regexp_split_ctx *splitctx);
+
/*
* RE_compile_and_cache - compile a RE, caching if possible
@@ -88,7 +138,7 @@ static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */
* cflags --- compile options for the pattern
*
* Pattern is given in the database encoding. We internally convert to
- * array of pg_wchar which is what Spencer's regex package wants.
+ * an array of pg_wchar, which is what Spencer's regex package wants.
*/
static regex_t *
RE_compile_and_cache(text *text_re, int cflags)
@@ -191,48 +241,36 @@ RE_compile_and_cache(text *text_re, int cflags)
}
/*
- * RE_compile_and_execute - compile and execute a RE
+ * RE_wchar_execute - execute a RE
*
* Returns TRUE on match, FALSE on no match
*
- * text_re --- the pattern, expressed as an *untoasted* TEXT object
- * dat --- the data to match against (need not be null-terminated)
- * dat_len --- the length of the data string
- * cflags --- compile options for the pattern
+ * re --- the compiled pattern as returned by RE_compile_and_cache
+ * data --- the data to match against (need not be null-terminated)
+ * data_len --- the length of the data string
+ * start_search -- the offset in the data to start searching
* nmatch, pmatch --- optional return area for match details
*
- * Both pattern and data are given in the database encoding. We internally
- * convert to array of pg_wchar which is what Spencer's regex package wants.
+ * Data is given as array of pg_wchar which is what Spencer's regex package
+ * wants.
*/
static bool
-RE_compile_and_execute(text *text_re, char *dat, int dat_len,
- int cflags, int nmatch, regmatch_t *pmatch)
+RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len,
+ size_t start_search, int nmatch, regmatch_t *pmatch)
{
- pg_wchar *data;
- size_t data_len;
int regexec_result;
- regex_t *re;
char errMsg[100];
- /* Convert data string to wide characters */
- data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
- data_len = pg_mb2wchar_with_len(dat, data, dat_len);
-
- /* Compile RE */
- re = RE_compile_and_cache(text_re, cflags);
-
/* Perform RE match and return result */
regexec_result = pg_regexec(re,
data,
data_len,
- 0,
+ start_search,
NULL, /* no details */
nmatch,
pmatch,
0);
- pfree(data);
-
if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
{
/* re failed??? */
@@ -245,13 +283,116 @@ RE_compile_and_execute(text *text_re, char *dat, int dat_len,
return (regexec_result == REG_OKAY);
}
+/*
+ * RE_execute - execute a RE
+ *
+ * Returns TRUE on match, FALSE on no match
+ *
+ * re --- the compiled pattern as returned by RE_compile_and_cache
+ * dat --- the data to match against (need not be null-terminated)
+ * dat_len --- the length of the data string
+ * nmatch, pmatch --- optional return area for match details
+ *
+ * Data is given in the database encoding. We internally
+ * convert to array of pg_wchar which is what Spencer's regex package wants.
+ */
+static bool
+RE_execute(regex_t *re, char *dat, int dat_len,
+ int nmatch, regmatch_t *pmatch)
+{
+ pg_wchar *data;
+ size_t data_len;
+ bool match;
+
+ /* Convert data string to wide characters */
+ data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
+ data_len = pg_mb2wchar_with_len(dat, data, dat_len);
+
+ /* Perform RE match and return result */
+ match = RE_wchar_execute(re, data, data_len, 0, nmatch, pmatch);
+ pfree(data);
+ return match;
+}
+
+/*
+ * RE_compile_and_execute - compile and execute a RE
+ *
+ * Returns TRUE on match, FALSE on no match
+ *
+ * text_re --- the pattern, expressed as an *untoasted* TEXT object
+ * dat --- the data to match against (need not be null-terminated)
+ * dat_len --- the length of the data string
+ * cflags --- compile options for the pattern
+ * nmatch, pmatch --- optional return area for match details
+ *
+ * Both pattern and data are given in the database encoding. We internally
+ * convert to array of pg_wchar which is what Spencer's regex package wants.
+ */
+static bool
+RE_compile_and_execute(text *text_re, char *dat, int dat_len,
+ int cflags, int nmatch, regmatch_t *pmatch)
+{
+ regex_t *re;
+
+ /* Compile RE */
+ re = RE_compile_and_cache(text_re, cflags);
+
+ return RE_execute(re, dat, dat_len, nmatch, pmatch);
+}
+
+static void
+parse_re_comp_flags(re_comp_flags *flags, text *opts)
+{
+ MemSet(flags, 0, sizeof(re_comp_flags));
+ flags->cflags = regex_flavor;
+
+ if (opts)
+ {
+ char *opt_p = VARDATA(opts);
+ size_t opt_len = VARSIZE(opts) - VARHDRSZ;
+ int i;
+
+ for (i = 0; i < opt_len; i++)
+ {
+ switch (opt_p[i])
+ {
+ case 'g':
+ flags->glob = true;
+ break;
+ case 'i':
+ flags->cflags |= REG_ICASE;
+ break;
+ case 'm':
+ case 'n':
+ flags->cflags |= REG_NEWLINE;
+ break;
+ case 'p':
+ flags->cflags |= REG_NLSTOP;
+ flags->cflags &= ~REG_NLANCH;
+ break;
+ case 'w':
+ flags->cflags &= ~REG_NLSTOP;
+ flags->cflags |= REG_NLANCH;
+ break;
+ case 'x':
+ flags->cflags |= REG_EXPANDED;
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid regexp option: %c", opt_p[i])));
+ break;
+ }
+ }
+ }
+}
+
/*
* assign_regex_flavor - GUC hook to validate and set REGEX_FLAVOR
*/
const char *
-assign_regex_flavor(const char *value,
- bool doit, GucSource source)
+assign_regex_flavor(const char *value, bool doit, GucSource source)
{
if (pg_strcasecmp(value, "advanced") == 0)
{
@@ -469,39 +610,14 @@ textregexreplace(PG_FUNCTION_ARGS)
text *p = PG_GETARG_TEXT_P(1);
text *r = PG_GETARG_TEXT_P(2);
text *opt = PG_GETARG_TEXT_P(3);
- char *opt_p = VARDATA(opt);
- int opt_len = (VARSIZE(opt) - VARHDRSZ);
- int i;
- bool glob = false;
- bool ignorecase = false;
regex_t *re;
+ re_comp_flags flags;
- /* parse options */
- for (i = 0; i < opt_len; i++)
- {
- switch (opt_p[i])
- {
- case 'i':
- ignorecase = true;
- break;
- case 'g':
- glob = true;
- break;
- default:
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("invalid option of regexp_replace: %c",
- opt_p[i])));
- break;
- }
- }
+ parse_re_comp_flags(&flags, opt);
- if (ignorecase)
- re = RE_compile_and_cache(p, regex_flavor | REG_ICASE);
- else
- re = RE_compile_and_cache(p, regex_flavor);
+ re = RE_compile_and_cache(p, flags.cflags);
- PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, glob));
+ PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, flags.glob));
}
/* similar_escape()
@@ -625,6 +741,361 @@ similar_escape(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(result);
}
+#define PG_GETARG_TEXT_P_IF_EXISTS(_n) \
+ (PG_NARGS() > _n ? PG_GETARG_TEXT_P(_n) : NULL)
+
+Datum
+regexp_matches(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ MemoryContext oldcontext;
+ regexp_matches_ctx *matchctx;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ text *pattern = PG_GETARG_TEXT_P(1);
+ text *flags = PG_GETARG_TEXT_P_IF_EXISTS(2);
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* be sure to copy the input string into the multi-call ctx */
+ matchctx = setup_regexp_matches(fcinfo, PG_GETARG_TEXT_P_COPY(0),
+ pattern, flags);
+
+ MemoryContextSwitchTo(oldcontext);
+ funcctx->user_fctx = (void *) matchctx;
+
+ /*
+ * Avoid run-away function by making sure we never iterate
+ * more than the length of the text + 1 (the number of matches
+ * an empty pattern will make is length + 1)
+ */
+ if (matchctx->flags.glob)
+ funcctx->max_calls = matchctx->wide_len + 1;
+ else
+ funcctx->max_calls = 0;
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ matchctx = (regexp_matches_ctx *) funcctx->user_fctx;
+
+ if (funcctx->call_cntr > funcctx->max_calls)
+ {
+ /*
+ * If max_calls == 0, then we are doing a non-global match, we
+ * should stop now, no problem. Otherwise, if we exceed
+ * max_calls something really wonky is going on, since it is
+ * returning more matches than there are characters in the
+ * string, which should not happen
+ */
+ if (funcctx->max_calls != 0)
+ elog(ERROR, "set returning match function terminated after iterating %d times",
+ funcctx->call_cntr);
+
+ SRF_RETURN_DONE(funcctx);
+ }
+
+ if (matchctx->offset < matchctx->wide_len)
+ {
+ ArrayType *result_ary;
+
+ if (matchctx->pmatch[0].rm_so == matchctx->pmatch[0].rm_eo)
+ matchctx->offset++;
+
+ result_ary = perform_regexp_matches(matchctx);
+ if (result_ary != NULL)
+ {
+ matchctx->offset = matchctx->pmatch[0].rm_eo;
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
+ }
+ /* else fall through and return done */
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+Datum
+regexp_matches_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_matches(fcinfo);
+}
+
+static regexp_matches_ctx *
+setup_regexp_matches(FunctionCallInfo fcinfo, text *orig_str, text *pattern, text *flags)
+{
+ regexp_matches_ctx *matchctx = palloc(sizeof(regexp_matches_ctx));
+
+ matchctx->orig_str = orig_str;
+ matchctx->orig_len = VARSIZE(matchctx->orig_str) - VARHDRSZ;
+
+ parse_re_comp_flags(&matchctx->flags, flags);
+
+ matchctx->cpattern = RE_compile_and_cache(pattern, matchctx->flags.cflags);
+ matchctx->pmatch = palloc(sizeof(regmatch_t) * (matchctx->cpattern->re_nsub + 1));
+ matchctx->offset = 0;
+
+ /* get text type oid, too lazy to do it some other way */
+ matchctx->param_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
+ get_typlenbyvalalign(matchctx->param_type, &matchctx->typlen,
+ &matchctx->typbyval, &matchctx->typalign);
+
+ matchctx->wide_str = palloc(sizeof(pg_wchar) * (matchctx->orig_len + 1));
+ matchctx->wide_len = pg_mb2wchar_with_len(VARDATA(matchctx->orig_str),
+ matchctx->wide_str, matchctx->orig_len);
+
+ matchctx->pmatch[0].rm_so = -1;
+ /* both < 0 but not equal */
+ matchctx->pmatch[0].rm_eo = -2;
+
+ return matchctx;
+}
+
+static ArrayType *
+perform_regexp_matches(regexp_matches_ctx *matchctx)
+{
+ Datum *elems;
+ bool *nulls;
+ Datum fullmatch; /* used to avoid a palloc if no matches */
+ int ndims = 1;
+ int dims[1];
+ int lbs[1] = {1};
+
+ if (RE_wchar_execute(matchctx->cpattern,
+ matchctx->wide_str,
+ matchctx->wide_len,
+ matchctx->offset,
+ matchctx->cpattern->re_nsub + 1,
+ matchctx->pmatch) == false)
+ return NULL;
+
+ if (matchctx->cpattern->re_nsub > 0)
+ {
+ int i;
+
+ elems = palloc(sizeof(Datum) * matchctx->cpattern->re_nsub);
+ nulls = palloc(sizeof(bool) * matchctx->cpattern->re_nsub);
+ dims[0] = matchctx->cpattern->re_nsub;
+
+ for (i = 0; i < matchctx->cpattern->re_nsub; i++)
+ {
+ int so = matchctx->pmatch[i + 1].rm_so;
+ int eo = matchctx->pmatch[i + 1].rm_eo;
+
+ if (so < 0 || eo < 0)
+ {
+ elems[i] = 0;
+ nulls[i] = true;
+ }
+ else
+ {
+ elems[i] = DirectFunctionCall3(text_substr,
+ PointerGetDatum(matchctx->orig_str),
+ Int32GetDatum(so + 1),
+ Int32GetDatum(eo - so));
+ nulls[i] = false;
+ }
+ }
+ }
+ else
+ {
+ int so = matchctx->pmatch[0].rm_so;
+ int eo = matchctx->pmatch[0].rm_eo;
+
+ if (so < 0 || eo < 0)
+ elog(ERROR, "regexp code said it had a match, but did not return it");
+
+ fullmatch = DirectFunctionCall3(text_substr,
+ PointerGetDatum(matchctx->orig_str),
+ Int32GetDatum(so + 1),
+ Int32GetDatum(eo - so));
+
+ elems = &fullmatch;
+ nulls = NULL;
+ dims[0] = 1;
+ }
+
+ return construct_md_array(elems, nulls, ndims, dims, lbs,
+ matchctx->param_type, matchctx->typlen,
+ matchctx->typbyval, matchctx->typalign);
+}
+
+Datum
+regexp_split_to_table(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ regexp_split_ctx *splitctx;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ text *pattern = PG_GETARG_TEXT_P(1);
+ text *flags = PG_GETARG_TEXT_P_IF_EXISTS(2);
+ MemoryContext oldcontext;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ splitctx = setup_regexp_split(PG_GETARG_TEXT_P_COPY(0), pattern, flags);
+
+ MemoryContextSwitchTo(oldcontext);
+ funcctx->user_fctx = (void *) splitctx;
+
+ /*
+ * Avoid run-away function by making sure we never iterate
+ * more than the length of the text
+ */
+ funcctx->max_calls = splitctx->wide_len;
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ splitctx = (regexp_split_ctx *) funcctx->user_fctx;
+
+ if (funcctx->call_cntr > funcctx->max_calls)
+ {
+ /*
+ * If we exceed wide_len something really wonky is going on,
+ * since it is returning more matches than there are
+ * characters in the string, which should not happen
+ */
+ elog(ERROR, "set returning split function terminated after iterating %d times",
+ funcctx->call_cntr);
+ }
+
+ if (splitctx->offset < splitctx->wide_len)
+ SRF_RETURN_NEXT(funcctx, get_next_split(splitctx));
+ else
+ SRF_RETURN_DONE(funcctx);
+}
+
+Datum regexp_split_to_table_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_split_to_table(fcinfo);
+}
+
+Datum regexp_split_to_array(PG_FUNCTION_ARGS)
+{
+ ArrayBuildState *astate = NULL;
+ regexp_split_ctx *splitctx;
+ Oid param_type;
+ int nitems;
+
+ splitctx = setup_regexp_split(PG_GETARG_TEXT_P(0),
+ PG_GETARG_TEXT_P(1),
+ PG_GETARG_TEXT_P_IF_EXISTS(2));
+
+ /* get text type oid, too lazy to do it some other way */
+ param_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
+
+ for (nitems = 0; splitctx->offset < splitctx->wide_len; nitems++)
+ {
+ if (nitems > splitctx->wide_len)
+ elog(ERROR, "split function terminated after iterating %d times",
+ nitems);
+
+ astate = accumArrayResult(astate,
+ get_next_split(splitctx),
+ false,
+ param_type,
+ CurrentMemoryContext);
+ }
+
+ PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
+}
+
+Datum regexp_split_to_array_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_split_to_array(fcinfo);
+}
+
+static regexp_split_ctx *
+setup_regexp_split(text *str, text *pattern, text *flags)
+{
+ regexp_split_ctx *splitctx = palloc(sizeof(regexp_split_ctx));
+
+ splitctx->orig_str = str;
+ splitctx->orig_len = VARSIZE(splitctx->orig_str) - VARHDRSZ;
+
+ parse_re_comp_flags(&splitctx->flags, flags);
+ if (splitctx->flags.glob)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("regexp_split does not support the global option")));
+
+ splitctx->cpattern = RE_compile_and_cache(pattern, splitctx->flags.cflags);
+
+ splitctx->wide_str = palloc(sizeof(pg_wchar) * (splitctx->orig_len + 1));
+ splitctx->wide_len = pg_mb2wchar_with_len(VARDATA(splitctx->orig_str),
+ splitctx->wide_str,
+ splitctx->orig_len);
+
+ splitctx->offset = 0;
+
+ splitctx->match.rm_so = -1;
+ /* both < 0 but not equal */
+ splitctx->match.rm_eo = -2;
+
+ return splitctx;
+}
+
+static Datum
+get_next_split(regexp_split_ctx *splitctx)
+{
+ regmatch_t *pmatch = &(splitctx->match);
+
+ for (;;)
+ {
+ Datum result;
+ int startpos = splitctx->offset + 1;
+
+ /*
+ * If the last match was zero-length, we need to push the
+ * offset forward to avoid matching the same place forever
+ */
+ if (pmatch->rm_so == pmatch->rm_eo)
+ splitctx->offset++;
+
+ if (RE_wchar_execute(splitctx->cpattern,
+ splitctx->wide_str,
+ splitctx->wide_len,
+ splitctx->offset,
+ 1,
+ pmatch))
+ {
+ int length = splitctx->match.rm_so - startpos + 1;
+
+ /*
+ * If we are trying to match at the beginning of the string and
+ * we got a zero-length match, or if we just matched where we
+ * left off last time, go around the loop again and increment
+ * the offset. If we have incremented the offset already and
+ * it matched at the new offset, that's ok
+ */
+ if (length == 0)
+ continue;
+
+ result = DirectFunctionCall3(text_substr,
+ PointerGetDatum(splitctx->orig_str),
+ Int32GetDatum(startpos),
+ Int32GetDatum(length));
+
+ /* set the offset to the end of this match for next time */
+ splitctx->offset = pmatch->rm_eo;
+
+ return result;
+ }
+
+ /* no more matches, return rest of string */
+ result = DirectFunctionCall2(text_substr_no_len,
+ PointerGetDatum(splitctx->orig_str),
+ Int32GetDatum(startpos));
+
+ /* so we know we're done next time through */
+ splitctx->offset = splitctx->wide_len;
+
+ return result;
+ }
+}
+
/*
* report whether regex_flavor is currently BASIC
*/
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 2327e1cab5e..2bd1eda199d 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.392 2007/03/20 03:53:26 wieck Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.393 2007/03/20 05:45:00 neilc Exp $
*
*-------------------------------------------------------------------------
*/
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 200703190
+#define CATALOG_VERSION_NO 200703201
#endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 6148702f5ee..8007129a1bb 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.448 2007/03/16 17:57:36 mha Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.449 2007/03/20 05:45:00 neilc Exp $
*
* NOTES
* The script catalog/genbki.sh reads this file and generates .bki
@@ -2261,8 +2261,20 @@ DATA(insert OID = 2284 ( regexp_replace PGNSP PGUID 12 1 0 f f t f i 3 25 "2
DESCR("replace text using regexp");
DATA(insert OID = 2285 ( regexp_replace PGNSP PGUID 12 1 0 f f t f i 4 25 "25 25 25 25" _null_ _null_ _null_ textregexreplace - _null_ ));
DESCR("replace text using regexp");
+DATA(insert OID = 2763 ( regexp_matches PGNSP PGUID 12 1 1 f f t t i 2 1009 "25 25" _null_ _null_ _null_ regexp_matches_no_flags - _null_ ));
+DESCR("return all match groups for regexp");
+DATA(insert OID = 2764 ( regexp_matches PGNSP PGUID 12 1 10 f f t t i 3 1009 "25 25 25" _null_ _null_ _null_ regexp_matches - _null_ ));
+DESCR("return all match groups for regexp");
DATA(insert OID = 2088 ( split_part PGNSP PGUID 12 1 0 f f t f i 3 25 "25 25 23" _null_ _null_ _null_ split_text - _null_ ));
DESCR("split string by field_sep and return field_num");
+DATA(insert OID = 2765 ( regexp_split_to_table PGNSP PGUID 12 1 1000 f f t t i 2 25 "25 25" _null_ _null_ _null_ regexp_split_to_table_no_flags - _null_ ));
+DESCR("split string by pattern");
+DATA(insert OID = 2766 ( regexp_split_to_table PGNSP PGUID 12 1 1000 f f t t i 3 25 "25 25 25" _null_ _null_ _null_ regexp_split_to_table - _null_ ));
+DESCR("split string by pattern");
+DATA(insert OID = 2767 ( regexp_split_to_array PGNSP PGUID 12 1 0 f f t f i 2 1009 "25 25" _null_ _null_ _null_ regexp_split_to_array_no_flags - _null_ ));
+DESCR("split string by pattern");
+DATA(insert OID = 2768 ( regexp_split_to_array PGNSP PGUID 12 1 0 f f t f i 3 1009 "25 25 25" _null_ _null_ _null_ regexp_split_to_array - _null_ ));
+DESCR("split string by pattern");
DATA(insert OID = 2089 ( to_hex PGNSP PGUID 12 1 0 f f t f i 1 25 "23" _null_ _null_ _null_ to_hex32 - _null_ ));
DESCR("convert int4 number to hex");
DATA(insert OID = 2090 ( to_hex PGNSP PGUID 12 1 0 f f t f i 1 25 "20" _null_ _null_ _null_ to_hex64 - _null_ ));
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 70c2142d8d4..b2a55bbbb98 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.289 2007/02/23 21:59:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.290 2007/03/20 05:45:00 neilc Exp $
*
*-------------------------------------------------------------------------
*/
@@ -478,6 +478,12 @@ extern Datum textregexsubstr(PG_FUNCTION_ARGS);
extern Datum textregexreplace_noopt(PG_FUNCTION_ARGS);
extern Datum textregexreplace(PG_FUNCTION_ARGS);
extern Datum similar_escape(PG_FUNCTION_ARGS);
+extern Datum regexp_matches(PG_FUNCTION_ARGS);
+extern Datum regexp_matches_no_flags(PG_FUNCTION_ARGS);
+extern Datum regexp_split_to_table(PG_FUNCTION_ARGS);
+extern Datum regexp_split_to_table_no_flags(PG_FUNCTION_ARGS);
+extern Datum regexp_split_to_array(PG_FUNCTION_ARGS);
+extern Datum regexp_split_to_array_no_flags(PG_FUNCTION_ARGS);
extern bool regex_flavor_is_basic(void);
/* regproc.c */
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 20081f699b6..e11dfc35d78 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -217,9 +217,232 @@ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
Z Z
(1 row)
--- invalid option of REGEXP_REPLACE
+-- invalid regexp option
SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
-ERROR: invalid option of regexp_replace: z
+ERROR: invalid regexp option: z
+-- set so we can tell NULL from empty string
+\pset null '\\N'
+-- return all matches from regexp
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
+ regexp_matches
+----------------
+ {bar,beque}
+(1 row)
+
+-- test case insensitive
+SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i');
+ regexp_matches
+----------------
+ {bAR,bEqUE}
+(1 row)
+
+-- global option - more than one match
+SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g');
+ regexp_matches
+----------------
+ {bar,beque}
+ {bazil,barf}
+(2 rows)
+
+-- empty capture group (matched empty string)
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$);
+ regexp_matches
+----------------
+ {bar,"",beque}
+(1 row)
+
+-- no match
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)(beque)$re$);
+ regexp_matches
+----------------
+(0 rows)
+
+-- optional capture group did not match, null entry in array
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$);
+ regexp_matches
+------------------
+ {bar,NULL,beque}
+(1 row)
+
+-- no capture groups
+SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
+ regexp_matches
+----------------
+ {barbeque}
+(1 row)
+
+-- give me errors
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'zipper');
+ERROR: invalid regexp option: z
+SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$);
+ERROR: invalid regular expression: parentheses () not balanced
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$);
+ERROR: invalid regular expression: invalid repetition count(s)
+-- split string on regexp
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo;
+ foo | length
+--------+--------
+ the | 3
+ quick | 5
+ brown | 5
+ fox | 3
+ jumped | 6
+ over | 4
+ the | 3
+ lazy | 4
+ dog | 3
+(9 rows)
+
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s+$re$);
+ regexp_split_to_array
+------------------------------------------------
+ {the,quick,brown,fox,jumped,over,the,lazy,dog}
+(1 row)
+
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s*$re$) AS foo;
+ foo | length
+-----+--------
+ t | 1
+ h | 1
+ e | 1
+ q | 1
+ u | 1
+ i | 1
+ c | 1
+ k | 1
+ b | 1
+ r | 1
+ o | 1
+ w | 1
+ n | 1
+ f | 1
+ o | 1
+ x | 1
+ j | 1
+ u | 1
+ m | 1
+ p | 1
+ e | 1
+ d | 1
+ o | 1
+ v | 1
+ e | 1
+ r | 1
+ t | 1
+ h | 1
+ e | 1
+ l | 1
+ a | 1
+ z | 1
+ y | 1
+ d | 1
+ o | 1
+ g | 1
+(36 rows)
+
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s*$re$);
+ regexp_split_to_array
+---------------------------------------------------------------------------
+ {t,h,e,q,u,i,c,k,b,r,o,w,n,f,o,x,j,u,m,p,e,d,o,v,e,r,t,h,e,l,a,z,y,d,o,g}
+(1 row)
+
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', '') AS foo;
+ foo | length
+-----+--------
+ t | 1
+ h | 1
+ e | 1
+ | 1
+ q | 1
+ u | 1
+ i | 1
+ c | 1
+ k | 1
+ | 1
+ b | 1
+ r | 1
+ o | 1
+ w | 1
+ n | 1
+ | 1
+ f | 1
+ o | 1
+ x | 1
+ | 1
+ j | 1
+ u | 1
+ m | 1
+ p | 1
+ e | 1
+ d | 1
+ | 1
+ o | 1
+ v | 1
+ e | 1
+ r | 1
+ | 1
+ t | 1
+ h | 1
+ e | 1
+ | 1
+ l | 1
+ a | 1
+ z | 1
+ y | 1
+ | 1
+ d | 1
+ o | 1
+ g | 1
+(44 rows)
+
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', '');
+ regexp_split_to_array
+-----------------------------------------------------------------------------------------------------------
+ {t,h,e," ",q,u,i,c,k," ",b,r,o,w,n," ",f,o,x," ",j,u,m,p,e,d," ",o,v,e,r," ",t,h,e," ",l,a,z,y," ",d,o,g}
+(1 row)
+
+-- case insensitive
+SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i') AS foo;
+ foo | length
+-----------------------+--------
+ th | 2
+ QUick bROWn FOx jUMP | 21
+ d ov | 4
+ r TH | 4
+ lazy dOG | 9
+(5 rows)
+
+SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i');
+ regexp_split_to_array
+--------------------------------------------------------
+ {th," QUick bROWn FOx jUMP","d ov","r TH"," lazy dOG"}
+(1 row)
+
+-- no match of pattern
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', 'nomatch') AS foo;
+ foo | length
+----------------------------------------------+--------
+ the quick brown fox jumped over the lazy dog | 44
+(1 row)
+
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', 'nomatch');
+ regexp_split_to_array
+--------------------------------------------------
+ {"the quick brown fox jumped over the lazy dog"}
+(1 row)
+
+-- errors
+SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy') AS foo;
+ERROR: invalid regexp option: z
+SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy');
+ERROR: invalid regexp option: z
+-- global option meaningless for regexp_split
+SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g') AS foo;
+ERROR: regexp_split does not support the global option
+SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g');
+ERROR: regexp_split does not support the global option
+-- change NULL-display back
+\pset null ''
-- E021-11 position expression
SELECT POSITION('4' IN '1234567890') = '4' AS "4";
4
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 570d9a27f46..389ff63517f 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -85,9 +85,60 @@ SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3
SELECT regexp_replace('AAA BBB CCC ', E'\\s+', ' ', 'g');
SELECT regexp_replace('AAA', '^|$', 'Z', 'g');
SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
--- invalid option of REGEXP_REPLACE
+-- invalid regexp option
SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
+-- set so we can tell NULL from empty string
+\pset null '\\N'
+
+-- return all matches from regexp
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
+
+-- test case insensitive
+SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i');
+
+-- global option - more than one match
+SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g');
+
+-- empty capture group (matched empty string)
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$);
+-- no match
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)(beque)$re$);
+-- optional capture group did not match, null entry in array
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$);
+
+-- no capture groups
+SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
+
+-- give me errors
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'zipper');
+SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$);
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$);
+
+-- split string on regexp
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo;
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s+$re$);
+
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s*$re$) AS foo;
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s*$re$);
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', '') AS foo;
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', '');
+-- case insensitive
+SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i') AS foo;
+SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i');
+-- no match of pattern
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', 'nomatch') AS foo;
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', 'nomatch');
+-- errors
+SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy') AS foo;
+SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy');
+-- global option meaningless for regexp_split
+SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g') AS foo;
+SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g');
+
+-- change NULL-display back
+\pset null ''
+
-- E021-11 position expression
SELECT POSITION('4' IN '1234567890') = '4' AS "4";