aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/regex/regcomp.c130
-rw-r--r--src/test/modules/test_regex/expected/test_regex.out8
-rw-r--r--src/test/modules/test_regex/sql/test_regex.sql2
3 files changed, 73 insertions, 67 deletions
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 9f71177d318..3d7f11af8c9 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -43,7 +43,7 @@ static int freev(struct vars *, int);
static void makesearch(struct vars *, struct nfa *);
static struct subre *parse(struct vars *, int, int, struct state *, struct state *);
static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int);
-static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *);
+static struct subre *parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *);
static void nonword(struct vars *, int, struct state *, struct state *);
static void word(struct vars *, int, struct state *, struct state *);
static void charclass(struct vars *, enum char_classes,
@@ -756,7 +756,7 @@ parsebranch(struct vars *v,
seencontent = 1;
/* NB, recursion in parseqatom() may swallow rest of branch */
- parseqatom(v, stopper, type, lp, right, t);
+ t = parseqatom(v, stopper, type, lp, right, t);
NOERRN();
}
@@ -777,8 +777,12 @@ parsebranch(struct vars *v,
* The bookkeeping near the end cooperates very closely with parsebranch();
* in particular, it contains a recursion that can involve parsing the rest
* of the branch, making this function's name somewhat inaccurate.
+ *
+ * Usually, the return value is just "top", but in some cases where we
+ * have parsed the rest of the branch, we may deem "top" redundant and
+ * free it, returning some child subre instead.
*/
-static void
+static struct subre *
parseqatom(struct vars *v,
int stopper, /* EOS or ')' */
int type, /* LACON (lookaround subRE) or PLAIN */
@@ -818,84 +822,84 @@ parseqatom(struct vars *v,
if (v->cflags & REG_NLANCH)
ARCV(BEHIND, v->nlcolor);
NEXT();
- return;
+ return top;
break;
case '$':
ARCV('$', 1);
if (v->cflags & REG_NLANCH)
ARCV(AHEAD, v->nlcolor);
NEXT();
- return;
+ return top;
break;
case SBEGIN:
ARCV('^', 1); /* BOL */
ARCV('^', 0); /* or BOS */
NEXT();
- return;
+ return top;
break;
case SEND:
ARCV('$', 1); /* EOL */
ARCV('$', 0); /* or EOS */
NEXT();
- return;
+ return top;
break;
case '<':
wordchrs(v);
s = newstate(v->nfa);
- NOERR();
+ NOERRN();
nonword(v, BEHIND, lp, s);
word(v, AHEAD, s, rp);
NEXT();
- return;
+ return top;
break;
case '>':
wordchrs(v);
s = newstate(v->nfa);
- NOERR();
+ NOERRN();
word(v, BEHIND, lp, s);
nonword(v, AHEAD, s, rp);
NEXT();
- return;
+ return top;
break;
case WBDRY:
wordchrs(v);
s = newstate(v->nfa);
- NOERR();
+ NOERRN();
nonword(v, BEHIND, lp, s);
word(v, AHEAD, s, rp);
s = newstate(v->nfa);
- NOERR();
+ NOERRN();
word(v, BEHIND, lp, s);
nonword(v, AHEAD, s, rp);
NEXT();
- return;
+ return top;
break;
case NWBDRY:
wordchrs(v);
s = newstate(v->nfa);
- NOERR();
+ NOERRN();
word(v, BEHIND, lp, s);
word(v, AHEAD, s, rp);
s = newstate(v->nfa);
- NOERR();
+ NOERRN();
nonword(v, BEHIND, lp, s);
nonword(v, AHEAD, s, rp);
NEXT();
- return;
+ return top;
break;
case LACON: /* lookaround constraint */
latype = v->nextvalue;
NEXT();
s = newstate(v->nfa);
s2 = newstate(v->nfa);
- NOERR();
+ NOERRN();
t = parse(v, ')', LACON, s, s2);
freesubre(v, t); /* internal structure irrelevant */
- NOERR();
+ NOERRN();
assert(SEE(')'));
NEXT();
processlacon(v, s, s2, latype, lp, rp);
- return;
+ return top;
break;
/* then errors, to get them out of the way */
case '*':
@@ -903,18 +907,18 @@ parseqatom(struct vars *v,
case '?':
case '{':
ERR(REG_BADRPT);
- return;
+ return top;
break;
default:
ERR(REG_ASSERT);
- return;
+ return top;
break;
/* then plain characters, and minor variants on that theme */
case ')': /* unbalanced paren */
if ((v->cflags & REG_ADVANCED) != REG_EXTENDED)
{
ERR(REG_EPAREN);
- return;
+ return top;
}
/* legal in EREs due to specification botch */
NOTE(REG_UPBOTCH);
@@ -923,7 +927,7 @@ parseqatom(struct vars *v,
case PLAIN:
onechr(v, v->nextvalue, lp, rp);
okcolors(v->nfa, v->cm);
- NOERR();
+ NOERRN();
NEXT();
break;
case '[':
@@ -972,14 +976,14 @@ parseqatom(struct vars *v,
*/
s = newstate(v->nfa);
s2 = newstate(v->nfa);
- NOERR();
+ NOERRN();
EMPTYARC(lp, s);
EMPTYARC(s2, rp);
- NOERR();
+ NOERRN();
atom = parse(v, ')', type, s, s2);
assert(SEE(')') || ISERR());
NEXT();
- NOERR();
+ NOERRN();
if (cap)
{
assert(v->subs[subno] == NULL);
@@ -994,7 +998,7 @@ parseqatom(struct vars *v,
{
/* generate no-op wrapper node to handle "((x))" */
t = subre(v, '(', atom->flags | CAP, lp, rp);
- NOERR();
+ NOERRN();
t->capno = subno;
t->child = atom;
atom = t;
@@ -1006,10 +1010,10 @@ parseqatom(struct vars *v,
INSIST(type != LACON, REG_ESUBREG);
INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
- NOERR();
+ NOERRN();
assert(v->nextvalue > 0);
atom = subre(v, 'b', BACKR, lp, rp);
- NOERR();
+ NOERRN();
subno = v->nextvalue;
atom->backno = subno;
EMPTYARC(lp, rp); /* temporarily, so there's something */
@@ -1050,7 +1054,7 @@ parseqatom(struct vars *v,
if (m > n)
{
ERR(REG_BADBR);
- return;
+ return top;
}
/* {m,n} exercises preference, even if it's {m,m} */
qprefer = (v->nextvalue) ? LONGER : SHORTER;
@@ -1064,7 +1068,7 @@ parseqatom(struct vars *v,
if (!SEE('}'))
{ /* catches errors too */
ERR(REG_BADBR);
- return;
+ return top;
}
NEXT();
break;
@@ -1083,7 +1087,7 @@ parseqatom(struct vars *v,
v->subs[subno] = NULL;
delsub(v->nfa, lp, rp);
EMPTYARC(lp, rp);
- return;
+ return top;
}
/* if not a messy case, avoid hard part */
@@ -1096,7 +1100,7 @@ parseqatom(struct vars *v,
if (atom != NULL)
freesubre(v, atom);
top->flags = f;
- return;
+ return top;
}
/*
@@ -1110,7 +1114,7 @@ parseqatom(struct vars *v,
if (atom == NULL)
{
atom = subre(v, '=', 0, lp, rp);
- NOERR();
+ NOERRN();
}
/*----------
@@ -1131,20 +1135,20 @@ parseqatom(struct vars *v,
*/
s = newstate(v->nfa); /* first, new endpoints for the atom */
s2 = newstate(v->nfa);
- NOERR();
+ NOERRN();
moveouts(v->nfa, lp, s);
moveins(v->nfa, rp, s2);
- NOERR();
+ NOERRN();
atom->begin = s;
atom->end = s2;
s = newstate(v->nfa); /* set up starting state */
- NOERR();
+ NOERRN();
EMPTYARC(lp, s);
- NOERR();
+ NOERRN();
/* break remaining subRE into x{...} and what follows */
t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);
- NOERR();
+ NOERRN();
t->child = atom;
atomp = &t->child;
@@ -1163,7 +1167,7 @@ parseqatom(struct vars *v,
*/
assert(top->op == '=' && top->child == NULL);
top->child = subre(v, '=', top->flags, top->begin, lp);
- NOERR();
+ NOERRN();
top->op = '.';
top->child->sibling = t;
/* top->flags will get updated later */
@@ -1182,11 +1186,11 @@ parseqatom(struct vars *v,
*/
dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
atom->begin, atom->end);
- NOERR();
+ NOERRN();
/* The backref node's NFA should not enforce any constraints */
removeconstraints(v->nfa, atom->begin, atom->end);
- NOERR();
+ NOERRN();
}
/*
@@ -1226,7 +1230,7 @@ parseqatom(struct vars *v,
repeat(v, atom->begin, atom->end, m, n);
f = COMBINE(qprefer, atom->flags);
t = subre(v, '=', f, atom->begin, atom->end);
- NOERR();
+ NOERRN();
freesubre(v, atom);
*atomp = t;
/* rest of branch can be strung starting from t->end */
@@ -1247,9 +1251,9 @@ parseqatom(struct vars *v,
repeat(v, s, atom->begin, m - 1, (n == DUPINF) ? n : n - 1);
f = COMBINE(qprefer, atom->flags);
t = subre(v, '.', f, s, atom->end); /* prefix and atom */
- NOERR();
+ NOERRN();
t->child = subre(v, '=', PREF(f), s, atom->begin);
- NOERR();
+ NOERRN();
t->child->sibling = atom;
*atomp = t;
/* rest of branch can be strung starting from atom->end */
@@ -1259,14 +1263,14 @@ parseqatom(struct vars *v,
{
/* general case: need an iteration node */
s2 = newstate(v->nfa);
- NOERR();
+ NOERRN();
moveouts(v->nfa, atom->end, s2);
- NOERR();
+ NOERRN();
dupnfa(v->nfa, atom->begin, atom->end, s, s2);
repeat(v, s, s2, m, n);
f = COMBINE(qprefer, atom->flags);
t = subre(v, '*', f, s, s2);
- NOERR();
+ NOERRN();
t->min = (short) m;
t->max = (short) n;
t->child = atom;
@@ -1280,7 +1284,7 @@ parseqatom(struct vars *v,
{
/* parse all the rest of the branch, and insert in t->child->sibling */
t->child->sibling = parsebranch(v, stopper, type, s2, rp, 1);
- NOERR();
+ NOERRN();
assert(SEE('|') || SEE(stopper) || SEE(EOS));
/* here's the promised update of the flags */
@@ -1299,9 +1303,7 @@ parseqatom(struct vars *v,
*
* If the messy atom was the first thing in the branch, then
* top->child is vacuous and we can get rid of one level of
- * concatenation. Since the caller is holding a pointer to the top
- * node, we can't remove that node; but we're allowed to change its
- * properties.
+ * concatenation.
*/
assert(top->child->op == '=');
if (top->child->begin == top->child->end)
@@ -1351,21 +1353,13 @@ parseqatom(struct vars *v,
{
assert(!MESSY(top->child->flags));
t = top->child->sibling;
- freesubre(v, top->child);
- top->op = t->op;
- top->flags = t->flags;
- top->latype = t->latype;
- top->id = t->id;
- top->capno = t->capno;
- top->backno = t->backno;
- top->min = t->min;
- top->max = t->max;
- top->child = t->child;
- top->begin = t->begin;
- top->end = t->end;
- freesrnode(v, t);
+ top->child->sibling = NULL;
+ freesubre(v, top);
+ top = t;
}
}
+
+ return top;
}
/*
@@ -2109,7 +2103,9 @@ freesrnode(struct vars *v, /* might be NULL */
if (!NULLCNFA(sr->cnfa))
freecnfa(&sr->cnfa);
- sr->flags = 0;
+ sr->flags = 0; /* in particular, not INUSE */
+ sr->child = sr->sibling = NULL;
+ sr->begin = sr->end = NULL;
if (v != NULL && v->treechain != NULL)
{
diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out
index 01d50ec1e3f..44da7d20190 100644
--- a/src/test/modules/test_regex/expected/test_regex.out
+++ b/src/test/modules/test_regex/expected/test_regex.out
@@ -3468,6 +3468,14 @@ select * from test_regex(' TO (([a-z0-9._]+|"([^"]+|"")+")+)', 'asd TO foo', 'M'
{" TO foo",foo,o,NULL}
(2 rows)
+-- expectMatch 21.36 RPQ ((.))(\2){0} xy x x x {}
+select * from test_regex('((.))(\2){0}', 'xy', 'RPQ');
+ test_regex
+--------------------------------------------
+ {3,REG_UBACKREF,REG_UBOUNDS,REG_UNONPOSIX}
+ {x,x,x,NULL}
+(2 rows)
+
-- doing 22 "multicharacter collating elements"
-- # again ugh
-- MCCEs are not implemented in Postgres, so we skip all these tests
diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql
index 7f5bc6e418f..9224fdfdd3a 100644
--- a/src/test/modules/test_regex/sql/test_regex.sql
+++ b/src/test/modules/test_regex/sql/test_regex.sql
@@ -1009,6 +1009,8 @@ select * from test_regex('(.*).*', 'abc', 'N');
select * from test_regex('(a*)*', 'bc', 'N');
-- expectMatch 21.35 M { TO (([a-z0-9._]+|"([^"]+|"")+")+)} {asd TO foo} { TO foo} foo o {}
select * from test_regex(' TO (([a-z0-9._]+|"([^"]+|"")+")+)', 'asd TO foo', 'M');
+-- expectMatch 21.36 RPQ ((.))(\2){0} xy x x x {}
+select * from test_regex('((.))(\2){0}', 'xy', 'RPQ');
-- doing 22 "multicharacter collating elements"
-- # again ugh