aboutsummaryrefslogtreecommitdiff
path: root/src/backend/regex/regcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/regex/regcomp.c')
-rw-r--r--src/backend/regex/regcomp.c222
1 files changed, 115 insertions, 107 deletions
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 0182e02db1b..c0680b280bc 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -58,6 +58,7 @@ static void processlacon(struct vars *, struct state *, struct state *, int,
struct state *, struct state *);
static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
static void freesubre(struct vars *, struct subre *);
+static void freesubreandsiblings(struct vars *, struct subre *);
static void freesrnode(struct vars *, struct subre *);
static void optst(struct vars *, struct subre *);
static int numst(struct subre *, int);
@@ -652,8 +653,8 @@ makesearch(struct vars *v,
* parse - parse an RE
*
* This is actually just the top level, which parses a bunch of branches
- * tied together with '|'. They appear in the tree as the left children
- * of a chain of '|' subres.
+ * tied together with '|'. If there's more than one, they appear in the
+ * tree as the children of a '|' subre.
*/
static struct subre *
parse(struct vars *v,
@@ -662,41 +663,34 @@ parse(struct vars *v,
struct state *init, /* initial state */
struct state *final) /* final state */
{
- struct state *left; /* scaffolding for branch */
- struct state *right;
struct subre *branches; /* top level */
- struct subre *branch; /* current branch */
- struct subre *t; /* temporary */
- int firstbranch; /* is this the first branch? */
+ struct subre *lastbranch; /* latest branch */
assert(stopper == ')' || stopper == EOS);
branches = subre(v, '|', LONGER, init, final);
NOERRN();
- branch = branches;
- firstbranch = 1;
+ lastbranch = NULL;
do
{ /* a branch */
- if (!firstbranch)
- {
- /* need a place to hang it */
- branch->right = subre(v, '|', LONGER, init, final);
- NOERRN();
- branch = branch->right;
- }
- firstbranch = 0;
+ struct subre *branch;
+ struct state *left; /* scaffolding for branch */
+ struct state *right;
+
left = newstate(v->nfa);
right = newstate(v->nfa);
NOERRN();
EMPTYARC(init, left);
EMPTYARC(right, final);
NOERRN();
- branch->left = parsebranch(v, stopper, type, left, right, 0);
+ branch = parsebranch(v, stopper, type, left, right, 0);
NOERRN();
- branch->flags |= UP(branch->flags | branch->left->flags);
- if ((branch->flags & ~branches->flags) != 0) /* new flags */
- for (t = branches; t != branch; t = t->right)
- t->flags |= branch->flags;
+ if (lastbranch)
+ lastbranch->sibling = branch;
+ else
+ branches->child = branch;
+ branches->flags |= UP(branches->flags | branch->flags);
+ lastbranch = branch;
} while (EAT('|'));
assert(SEE(stopper) || SEE(EOS));
@@ -707,20 +701,16 @@ parse(struct vars *v,
}
/* optimize out simple cases */
- if (branch == branches)
+ if (lastbranch == branches->child)
{ /* only one branch */
- assert(branch->right == NULL);
- t = branch->left;
- branch->left = NULL;
- freesubre(v, branches);
- branches = t;
+ assert(lastbranch->sibling == NULL);
+ freesrnode(v, branches);
+ branches = lastbranch;
}
else if (!MESSY(branches->flags))
{ /* no interesting innards */
- freesubre(v, branches->left);
- branches->left = NULL;
- freesubre(v, branches->right);
- branches->right = NULL;
+ freesubreandsiblings(v, branches->child);
+ branches->child = NULL;
branches->op = '=';
}
@@ -972,7 +962,7 @@ parseqatom(struct vars *v,
t = subre(v, '(', atom->flags | CAP, lp, rp);
NOERR();
t->subno = subno;
- t->left = atom;
+ t->child = atom;
atom = t;
}
/* postpone everything else pending possible {0} */
@@ -1120,26 +1110,27 @@ parseqatom(struct vars *v,
/* break remaining subRE into x{...} and what follows */
t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);
NOERR();
- t->left = atom;
- atomp = &t->left;
+ t->child = atom;
+ atomp = &t->child;
/*
- * Here we should recurse to fill t->right ... but we must postpone that
- * to the end.
+ * Here we should recurse to fill t->child->sibling ... but we must
+ * postpone that to the end. One reason is that t->child may be replaced
+ * below, and we don't want to worry about its sibling link.
*/
/*
- * Convert top node to a concatenation of the prefix (top->left, covering
+ * Convert top node to a concatenation of the prefix (top->child, covering
* whatever we parsed previously) and remaining (t). Note that the prefix
* could be empty, in which case this concatenation node is unnecessary.
* To keep things simple, we operate in a general way for now, and get rid
* of unnecessary subres below.
*/
- assert(top->op == '=' && top->left == NULL && top->right == NULL);
- top->left = subre(v, '=', top->flags, top->begin, lp);
+ assert(top->op == '=' && top->child == NULL);
+ top->child = subre(v, '=', top->flags, top->begin, lp);
NOERR();
top->op = '.';
- top->right = t;
+ top->child->sibling = t;
/* top->flags will get updated later */
/* if it's a backref, now is the time to replicate the subNFA */
@@ -1201,9 +1192,9 @@ parseqatom(struct vars *v,
f = COMBINE(qprefer, atom->flags);
t = subre(v, '.', f, s, atom->end); /* prefix and atom */
NOERR();
- t->left = subre(v, '=', PREF(f), s, atom->begin);
+ t->child = subre(v, '=', PREF(f), s, atom->begin);
NOERR();
- t->right = atom;
+ t->child->sibling = atom;
*atomp = t;
/* rest of branch can be strung starting from atom->end */
s2 = atom->end;
@@ -1222,44 +1213,43 @@ parseqatom(struct vars *v,
NOERR();
t->min = (short) m;
t->max = (short) n;
- t->left = atom;
+ t->child = atom;
*atomp = t;
/* rest of branch is to be strung from iteration's end state */
}
/* and finally, look after that postponed recursion */
- t = top->right;
+ t = top->child->sibling;
if (!(SEE('|') || SEE(stopper) || SEE(EOS)))
{
- /* parse all the rest of the branch, and insert in t->right */
- t->right = parsebranch(v, stopper, type, s2, rp, 1);
+ /* parse all the rest of the branch, and insert in t->child->sibling */
+ t->child->sibling = parsebranch(v, stopper, type, s2, rp, 1);
NOERR();
assert(SEE('|') || SEE(stopper) || SEE(EOS));
/* here's the promised update of the flags */
- t->flags |= COMBINE(t->flags, t->right->flags);
+ t->flags |= COMBINE(t->flags, t->child->sibling->flags);
top->flags |= COMBINE(top->flags, t->flags);
/*
* At this point both top and t are concatenation (op == '.') subres,
- * and we have top->left = prefix of branch, top->right = t, t->left =
- * messy atom (with quantification superstructure if needed), t->right
- * = rest of branch.
+ * and we have top->child = prefix of branch, top->child->sibling = t,
+ * t->child = messy atom (with quantification superstructure if
+ * needed), t->child->sibling = rest of branch.
*
- * If the messy atom was the first thing in the branch, then top->left
- * is vacuous and we can get rid of one level of concatenation. Since
- * the caller is holding a pointer to the top node, we can't remove
- * that node; but we're allowed to change its properties.
+ * If the messy atom was the first thing in the branch, then
+ * top->child is vacuous and we can get rid of one level of
+ * concatenation. Since the caller is holding a pointer to the top
+ * node, we can't remove that node; but we're allowed to change its
+ * properties.
*/
- assert(top->left->op == '=');
- if (top->left->begin == top->left->end)
+ assert(top->child->op == '=');
+ if (top->child->begin == top->child->end)
{
- assert(!MESSY(top->left->flags));
- freesubre(v, top->left);
- top->left = t->left;
- top->right = t->right;
- t->left = t->right = NULL;
- freesubre(v, t);
+ assert(!MESSY(top->child->flags));
+ freesubre(v, top->child);
+ top->child = t->child;
+ freesrnode(v, t);
}
}
else
@@ -1269,34 +1259,31 @@ parseqatom(struct vars *v,
* concatenation node 't'. Just link s2 straight to rp.
*/
EMPTYARC(s2, rp);
- top->right = t->left;
- top->flags |= COMBINE(top->flags, top->right->flags);
- t->left = t->right = NULL;
- freesubre(v, t);
+ top->child->sibling = t->child;
+ top->flags |= COMBINE(top->flags, top->child->sibling->flags);
+ freesrnode(v, t);
/*
- * Again, it could be that top->left is vacuous (if the messy atom was
- * in fact the only thing in the branch). In that case we need no
- * concatenation at all; just replace top with top->right.
+ * Again, it could be that top->child is vacuous (if the messy atom
+ * was in fact the only thing in the branch). In that case we need no
+ * concatenation at all; just replace top with top->child->sibling.
*/
- assert(top->left->op == '=');
- if (top->left->begin == top->left->end)
+ assert(top->child->op == '=');
+ if (top->child->begin == top->child->end)
{
- assert(!MESSY(top->left->flags));
- freesubre(v, top->left);
- t = top->right;
+ assert(!MESSY(top->child->flags));
+ t = top->child->sibling;
+ freesubre(v, top->child);
top->op = t->op;
top->flags = t->flags;
top->id = t->id;
top->subno = t->subno;
top->min = t->min;
top->max = t->max;
- top->left = t->left;
- top->right = t->right;
+ top->child = t->child;
top->begin = t->begin;
top->end = t->end;
- t->left = t->right = NULL;
- freesubre(v, t);
+ freesrnode(v, t);
}
}
}
@@ -1786,7 +1773,7 @@ subre(struct vars *v,
}
if (ret != NULL)
- v->treefree = ret->left;
+ v->treefree = ret->child;
else
{
ret = (struct subre *) MALLOC(sizeof(struct subre));
@@ -1806,8 +1793,8 @@ subre(struct vars *v,
ret->id = 0; /* will be assigned later */
ret->subno = 0;
ret->min = ret->max = 1;
- ret->left = NULL;
- ret->right = NULL;
+ ret->child = NULL;
+ ret->sibling = NULL;
ret->begin = begin;
ret->end = end;
ZAPCNFA(ret->cnfa);
@@ -1817,6 +1804,9 @@ subre(struct vars *v,
/*
* freesubre - free a subRE subtree
+ *
+ * This frees child node(s) of the given subRE too,
+ * but not its siblings.
*/
static void
freesubre(struct vars *v, /* might be NULL */
@@ -1825,15 +1815,32 @@ freesubre(struct vars *v, /* might be NULL */
if (sr == NULL)
return;
- if (sr->left != NULL)
- freesubre(v, sr->left);
- if (sr->right != NULL)
- freesubre(v, sr->right);
+ if (sr->child != NULL)
+ freesubreandsiblings(v, sr->child);
freesrnode(v, sr);
}
/*
+ * freesubreandsiblings - free a subRE subtree
+ *
+ * This frees child node(s) of the given subRE too,
+ * as well as any following siblings.
+ */
+static void
+freesubreandsiblings(struct vars *v, /* might be NULL */
+ struct subre *sr)
+{
+ while (sr != NULL)
+ {
+ struct subre *next = sr->sibling;
+
+ freesubre(v, sr);
+ sr = next;
+ }
+}
+
+/*
* freesrnode - free one node in a subRE subtree
*/
static void
@@ -1850,7 +1857,7 @@ freesrnode(struct vars *v, /* might be NULL */
if (v != NULL && v->treechain != NULL)
{
/* we're still parsing, maybe we can reuse the subre */
- sr->left = v->treefree;
+ sr->child = v->treefree;
v->treefree = sr;
}
else
@@ -1881,15 +1888,14 @@ numst(struct subre *t,
int start) /* starting point for subtree numbers */
{
int i;
+ struct subre *t2;
assert(t != NULL);
i = start;
t->id = (short) i++;
- if (t->left != NULL)
- i = numst(t->left, i);
- if (t->right != NULL)
- i = numst(t->right, i);
+ for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
+ i = numst(t2, i);
return i;
}
@@ -1913,13 +1919,13 @@ numst(struct subre *t,
static void
markst(struct subre *t)
{
+ struct subre *t2;
+
assert(t != NULL);
t->flags |= INUSE;
- if (t->left != NULL)
- markst(t->left);
- if (t->right != NULL)
- markst(t->right);
+ for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
+ markst(t2);
}
/*
@@ -1949,12 +1955,12 @@ nfatree(struct vars *v,
struct subre *t,
FILE *f) /* for debug output */
{
+ struct subre *t2;
+
assert(t != NULL && t->begin != NULL);
- if (t->left != NULL)
- (DISCARD) nfatree(v, t->left, f);
- if (t->right != NULL)
- (DISCARD) nfatree(v, t->right, f);
+ for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
+ (DISCARD) nfatree(v, t2, f);
return nfanode(v, t, 0, f);
}
@@ -2206,6 +2212,7 @@ stdump(struct subre *t,
int nfapresent) /* is the original NFA still around? */
{
char idbuf[50];
+ struct subre *t2;
fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op);
if (t->flags & LONGER)
@@ -2231,20 +2238,21 @@ stdump(struct subre *t,
}
if (nfapresent)
fprintf(f, " %ld-%ld", (long) t->begin->no, (long) t->end->no);
- if (t->left != NULL)
- fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf)));
- if (t->right != NULL)
- fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf)));
+ if (t->child != NULL)
+ fprintf(f, " C:%s", stid(t->child, idbuf, sizeof(idbuf)));
+ /* printing second child isn't necessary, but it is often helpful */
+ if (t->child != NULL && t->child->sibling != NULL)
+ fprintf(f, " C2:%s", stid(t->child->sibling, idbuf, sizeof(idbuf)));
+ if (t->sibling != NULL)
+ fprintf(f, " S:%s", stid(t->sibling, idbuf, sizeof(idbuf)));
if (!NULLCNFA(t->cnfa))
{
fprintf(f, "\n");
dumpcnfa(&t->cnfa, f);
}
fprintf(f, "\n");
- if (t->left != NULL)
- stdump(t->left, f, nfapresent);
- if (t->right != NULL)
- stdump(t->right, f, nfapresent);
+ for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
+ stdump(t2, f, nfapresent);
}
/*