3 files changed, 73 insertions, 67 deletions
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 9f71177d318..3d7f11af8c9 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -43,7 +43,7 @@ static int	freev(struct vars *, int);
 static void makesearch(struct vars *, struct nfa *);
 static struct subre *parse(struct vars *, int, int, struct state *, struct state *);
 static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int);
-static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *);
+static struct subre *parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *);
 static void nonword(struct vars *, int, struct state *, struct state *);
 static void word(struct vars *, int, struct state *, struct state *);
 static void charclass(struct vars *, enum char_classes,
@@ -756,7 +756,7 @@ parsebranch(struct vars *v,
 		seencontent = 1;
 
 		/* NB, recursion in parseqatom() may swallow rest of branch */
-		parseqatom(v, stopper, type, lp, right, t);
+		t = parseqatom(v, stopper, type, lp, right, t);
 		NOERRN();
 	}
 
@@ -777,8 +777,12 @@ parsebranch(struct vars *v,
  * The bookkeeping near the end cooperates very closely with parsebranch();
  * in particular, it contains a recursion that can involve parsing the rest
  * of the branch, making this function's name somewhat inaccurate.
+ *
+ * Usually, the return value is just "top", but in some cases where we
+ * have parsed the rest of the branch, we may deem "top" redundant and
+ * free it, returning some child subre instead.
  */
-static void
+static struct subre *
 parseqatom(struct vars *v,
 		   int stopper,			/* EOS or ')' */
 		   int type,			/* LACON (lookaround subRE) or PLAIN */
@@ -818,84 +822,84 @@ parseqatom(struct vars *v,
 			if (v->cflags & REG_NLANCH)
 				ARCV(BEHIND, v->nlcolor);
 			NEXT();
-			return;
+			return top;
 			break;
 		case '$':
 			ARCV('$', 1);
 			if (v->cflags & REG_NLANCH)
 				ARCV(AHEAD, v->nlcolor);
 			NEXT();
-			return;
+			return top;
 			break;
 		case SBEGIN:
 			ARCV('^', 1);		/* BOL */
 			ARCV('^', 0);		/* or BOS */
 			NEXT();
-			return;
+			return top;
 			break;
 		case SEND:
 			ARCV('$', 1);		/* EOL */
 			ARCV('$', 0);		/* or EOS */
 			NEXT();
-			return;
+			return top;
 			break;
 		case '<':
 			wordchrs(v);
 			s = newstate(v->nfa);
-			NOERR();
+			NOERRN();
 			nonword(v, BEHIND, lp, s);
 			word(v, AHEAD, s, rp);
 			NEXT();
-			return;
+			return top;
 			break;
 		case '>':
 			wordchrs(v);
 			s = newstate(v->nfa);
-			NOERR();
+			NOERRN();
 			word(v, BEHIND, lp, s);
 			nonword(v, AHEAD, s, rp);
 			NEXT();
-			return;
+			return top;
 			break;
 		case WBDRY:
 			wordchrs(v);
 			s = newstate(v->nfa);
-			NOERR();
+			NOERRN();
 			nonword(v, BEHIND, lp, s);
 			word(v, AHEAD, s, rp);
 			s = newstate(v->nfa);
-			NOERR();
+			NOERRN();
 			word(v, BEHIND, lp, s);
 			nonword(v, AHEAD, s, rp);
 			NEXT();
-			return;
+			return top;
 			break;
 		case NWBDRY:
 			wordchrs(v);
 			s = newstate(v->nfa);
-			NOERR();
+			NOERRN();
 			word(v, BEHIND, lp, s);
 			word(v, AHEAD, s, rp);
 			s = newstate(v->nfa);
-			NOERR();
+			NOERRN();
 			nonword(v, BEHIND, lp, s);
 			nonword(v, AHEAD, s, rp);
 			NEXT();
-			return;
+			return top;
 			break;
 		case LACON:				/* lookaround constraint */
 			latype = v->nextvalue;
 			NEXT();
 			s = newstate(v->nfa);
 			s2 = newstate(v->nfa);
-			NOERR();
+			NOERRN();
 			t = parse(v, ')', LACON, s, s2);
 			freesubre(v, t);	/* internal structure irrelevant */
-			NOERR();
+			NOERRN();
 			assert(SEE(')'));
 			NEXT();
 			processlacon(v, s, s2, latype, lp, rp);
-			return;
+			return top;
 			break;
 			/* then errors, to get them out of the way */
 		case '*':
@@ -903,18 +907,18 @@ parseqatom(struct vars *v,
 		case '?':
 		case '{':
 			ERR(REG_BADRPT);
-			return;
+			return top;
 			break;
 		default:
 			ERR(REG_ASSERT);
-			return;
+			return top;
 			break;
 			/* then plain characters, and minor variants on that theme */
 		case ')':				/* unbalanced paren */
 			if ((v->cflags & REG_ADVANCED) != REG_EXTENDED)
 			{
 				ERR(REG_EPAREN);
-				return;
+				return top;
 			}
 			/* legal in EREs due to specification botch */
 			NOTE(REG_UPBOTCH);
@@ -923,7 +927,7 @@ parseqatom(struct vars *v,
 		case PLAIN:
 			onechr(v, v->nextvalue, lp, rp);
 			okcolors(v->nfa, v->cm);
-			NOERR();
+			NOERRN();
 			NEXT();
 			break;
 		case '[':
@@ -972,14 +976,14 @@ parseqatom(struct vars *v,
 			 */
 			s = newstate(v->nfa);
 			s2 = newstate(v->nfa);
-			NOERR();
+			NOERRN();
 			EMPTYARC(lp, s);
 			EMPTYARC(s2, rp);
-			NOERR();
+			NOERRN();
 			atom = parse(v, ')', type, s, s2);
 			assert(SEE(')') || ISERR());
 			NEXT();
-			NOERR();
+			NOERRN();
 			if (cap)
 			{
 				assert(v->subs[subno] == NULL);
@@ -994,7 +998,7 @@ parseqatom(struct vars *v,
 				{
 					/* generate no-op wrapper node to handle "((x))" */
 					t = subre(v, '(', atom->flags | CAP, lp, rp);
-					NOERR();
+					NOERRN();
 					t->capno = subno;
 					t->child = atom;
 					atom = t;
@@ -1006,10 +1010,10 @@ parseqatom(struct vars *v,
 			INSIST(type != LACON, REG_ESUBREG);
 			INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
 			INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
-			NOERR();
+			NOERRN();
 			assert(v->nextvalue > 0);
 			atom = subre(v, 'b', BACKR, lp, rp);
-			NOERR();
+			NOERRN();
 			subno = v->nextvalue;
 			atom->backno = subno;
 			EMPTYARC(lp, rp);	/* temporarily, so there's something */
@@ -1050,7 +1054,7 @@ parseqatom(struct vars *v,
 				if (m > n)
 				{
 					ERR(REG_BADBR);
-					return;
+					return top;
 				}
 				/* {m,n} exercises preference, even if it's {m,m} */
 				qprefer = (v->nextvalue) ? LONGER : SHORTER;
@@ -1064,7 +1068,7 @@ parseqatom(struct vars *v,
 			if (!SEE('}'))
 			{					/* catches errors too */
 				ERR(REG_BADBR);
-				return;
+				return top;
 			}
 			NEXT();
 			break;
@@ -1083,7 +1087,7 @@ parseqatom(struct vars *v,
 			v->subs[subno] = NULL;
 		delsub(v->nfa, lp, rp);
 		EMPTYARC(lp, rp);
-		return;
+		return top;
 	}
 
 	/* if not a messy case, avoid hard part */
@@ -1096,7 +1100,7 @@ parseqatom(struct vars *v,
 		if (atom != NULL)
 			freesubre(v, atom);
 		top->flags = f;
-		return;
+		return top;
 	}
 
 	/*
@@ -1110,7 +1114,7 @@ parseqatom(struct vars *v,
 	if (atom == NULL)
 	{
 		atom = subre(v, '=', 0, lp, rp);
-		NOERR();
+		NOERRN();
 	}
 
 	/*----------
@@ -1131,20 +1135,20 @@ parseqatom(struct vars *v,
 	 */
 	s = newstate(v->nfa);		/* first, new endpoints for the atom */
 	s2 = newstate(v->nfa);
-	NOERR();
+	NOERRN();
 	moveouts(v->nfa, lp, s);
 	moveins(v->nfa, rp, s2);
-	NOERR();
+	NOERRN();
 	atom->begin = s;
 	atom->end = s2;
 	s = newstate(v->nfa);		/* set up starting state */
-	NOERR();
+	NOERRN();
 	EMPTYARC(lp, s);
-	NOERR();
+	NOERRN();
 
 	/* break remaining subRE into x{...} and what follows */
 	t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);
-	NOERR();
+	NOERRN();
 	t->child = atom;
 	atomp = &t->child;
 
@@ -1163,7 +1167,7 @@ parseqatom(struct vars *v,
 	 */
 	assert(top->op == '=' && top->child == NULL);
 	top->child = subre(v, '=', top->flags, top->begin, lp);
-	NOERR();
+	NOERRN();
 	top->op = '.';
 	top->child->sibling = t;
 	/* top->flags will get updated later */
@@ -1182,11 +1186,11 @@ parseqatom(struct vars *v,
 		 */
 		dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
 			   atom->begin, atom->end);
-		NOERR();
+		NOERRN();
 
 		/* The backref node's NFA should not enforce any constraints */
 		removeconstraints(v->nfa, atom->begin, atom->end);
-		NOERR();
+		NOERRN();
 	}
 
 	/*
@@ -1226,7 +1230,7 @@ parseqatom(struct vars *v,
 		repeat(v, atom->begin, atom->end, m, n);
 		f = COMBINE(qprefer, atom->flags);
 		t = subre(v, '=', f, atom->begin, atom->end);
-		NOERR();
+		NOERRN();
 		freesubre(v, atom);
 		*atomp = t;
 		/* rest of branch can be strung starting from t->end */
@@ -1247,9 +1251,9 @@ parseqatom(struct vars *v,
 		repeat(v, s, atom->begin, m - 1, (n == DUPINF) ? n : n - 1);
 		f = COMBINE(qprefer, atom->flags);
 		t = subre(v, '.', f, s, atom->end); /* prefix and atom */
-		NOERR();
+		NOERRN();
 		t->child = subre(v, '=', PREF(f), s, atom->begin);
-		NOERR();
+		NOERRN();
 		t->child->sibling = atom;
 		*atomp = t;
 		/* rest of branch can be strung starting from atom->end */
@@ -1259,14 +1263,14 @@ parseqatom(struct vars *v,
 	{
 		/* general case: need an iteration node */
 		s2 = newstate(v->nfa);
-		NOERR();
+		NOERRN();
 		moveouts(v->nfa, atom->end, s2);
-		NOERR();
+		NOERRN();
 		dupnfa(v->nfa, atom->begin, atom->end, s, s2);
 		repeat(v, s, s2, m, n);
 		f = COMBINE(qprefer, atom->flags);
 		t = subre(v, '*', f, s, s2);
-		NOERR();
+		NOERRN();
 		t->min = (short) m;
 		t->max = (short) n;
 		t->child = atom;
@@ -1280,7 +1284,7 @@ parseqatom(struct vars *v,
 	{
 		/* parse all the rest of the branch, and insert in t->child->sibling */
 		t->child->sibling = parsebranch(v, stopper, type, s2, rp, 1);
-		NOERR();
+		NOERRN();
 		assert(SEE('|') || SEE(stopper) || SEE(EOS));
 
 		/* here's the promised update of the flags */
@@ -1299,9 +1303,7 @@ parseqatom(struct vars *v,
 		 *
 		 * If the messy atom was the first thing in the branch, then
 		 * top->child is vacuous and we can get rid of one level of
-		 * concatenation.  Since the caller is holding a pointer to the top
-		 * node, we can't remove that node; but we're allowed to change its
-		 * properties.
+		 * concatenation.
 		 */
 		assert(top->child->op == '=');
 		if (top->child->begin == top->child->end)
@@ -1351,21 +1353,13 @@ parseqatom(struct vars *v,
 		{
 			assert(!MESSY(top->child->flags));
 			t = top->child->sibling;
-			freesubre(v, top->child);
-			top->op = t->op;
-			top->flags = t->flags;
-			top->latype = t->latype;
-			top->id = t->id;
-			top->capno = t->capno;
-			top->backno = t->backno;
-			top->min = t->min;
-			top->max = t->max;
-			top->child = t->child;
-			top->begin = t->begin;
-			top->end = t->end;
-			freesrnode(v, t);
+			top->child->sibling = NULL;
+			freesubre(v, top);
+			top = t;
 		}
 	}
+
+	return top;
 }
 
 /*
@@ -2109,7 +2103,9 @@ freesrnode(struct vars *v,		/* might be NULL */
 
 	if (!NULLCNFA(sr->cnfa))
 		freecnfa(&sr->cnfa);
-	sr->flags = 0;
+	sr->flags = 0;				/* in particular, not INUSE */
+	sr->child = sr->sibling = NULL;
+	sr->begin = sr->end = NULL;
 
 	if (v != NULL && v->treechain != NULL)
 	{
diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out
index 01d50ec1e3f..44da7d20190 100644
--- a/src/test/modules/test_regex/expected/test_regex.out
+++ b/src/test/modules/test_regex/expected/test_regex.out
@@ -3468,6 +3468,14 @@ select * from test_regex(' TO (([a-z0-9._]+|"([^"]+|"")+")+)', 'asd TO foo', 'M'
  {" TO foo",foo,o,NULL}
 (2 rows)
 
+-- expectMatch	21.36 RPQ	((.))(\2){0}	xy	x	x	x	{}
+select * from test_regex('((.))(\2){0}', 'xy', 'RPQ');
+                 test_regex                 
+--------------------------------------------
+ {3,REG_UBACKREF,REG_UBOUNDS,REG_UNONPOSIX}
+ {x,x,x,NULL}
+(2 rows)
+
 -- doing 22 "multicharacter collating elements"
 -- # again ugh
 -- MCCEs are not implemented in Postgres, so we skip all these tests
diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql
index 7f5bc6e418f..9224fdfdd3a 100644
--- a/src/test/modules/test_regex/sql/test_regex.sql
+++ b/src/test/modules/test_regex/sql/test_regex.sql
@@ -1009,6 +1009,8 @@ select * from test_regex('(.*).*', 'abc', 'N');
 select * from test_regex('(a*)*', 'bc', 'N');
 -- expectMatch	21.35 M		{ TO (([a-z0-9._]+|"([^"]+|"")+")+)}	{asd TO foo}	{ TO foo} foo o {}
 select * from test_regex(' TO (([a-z0-9._]+|"([^"]+|"")+")+)', 'asd TO foo', 'M');
+-- expectMatch	21.36 RPQ	((.))(\2){0}	xy	x	x	x	{}
+select * from test_regex('((.))(\2){0}', 'xy', 'RPQ');
 
 -- doing 22 "multicharacter collating elements"
 -- # again ugh