aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/regex/regc_nfa.c150
-rw-r--r--src/backend/regex/regcomp.c4
2 files changed, 119 insertions, 35 deletions
diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c
index 4c03ff14037..ecc91a8ede9 100644
--- a/src/backend/regex/regc_nfa.c
+++ b/src/backend/regex/regc_nfa.c
@@ -1513,6 +1513,7 @@ pullback(struct nfa * nfa,
struct state *nexts;
struct arc *a;
struct arc *nexta;
+ struct state *intermediates;
int progress;
/* find and pull until there are no more */
@@ -1522,14 +1523,25 @@ pullback(struct nfa * nfa,
for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
{
nexts = s->next;
+ intermediates = NULL;
for (a = s->outs; a != NULL && !NISERR(); a = nexta)
{
nexta = a->outchain;
if (a->type == '^' || a->type == BEHIND)
- if (pull(nfa, a))
+ if (pull(nfa, a, &intermediates))
progress = 1;
- assert(nexta == NULL || s->no != FREESTATE);
}
+ /* clear tmp fields of intermediate states created here */
+ while (intermediates != NULL)
+ {
+ struct state *ns = intermediates->tmp;
+
+ intermediates->tmp = NULL;
+ intermediates = ns;
+ }
+ /* if s is now useless, get rid of it */
+ if ((s->nins == 0 || s->nouts == 0) && !s->flag)
+ dropstate(nfa, s);
}
if (progress && f != NULL)
dumpnfa(nfa, f);
@@ -1557,13 +1569,26 @@ pullback(struct nfa * nfa,
/*
* pull - pull a back constraint backward past its source state
- * A significant property of this function is that it deletes at most
- * one state -- the constraint's from state -- and only if the constraint
- * was that state's last outarc.
+ *
+ * Returns 1 if successful (which it always is unless the source is the
+ * start state or we have an internal error), 0 if nothing happened.
+ *
+ * A significant property of this function is that it deletes no pre-existing
+ * states, and no outarcs of the constraint's from state other than the given
+ * constraint arc. This makes the loops in pullback() safe, at the cost that
+ * we may leave useless states behind. Therefore, we leave it to pullback()
+ * to delete such states.
+ *
+ * If the from state has multiple back-constraint outarcs, and/or multiple
+ * compatible constraint inarcs, we only need to create one new intermediate
+ * state per combination of predecessor and successor states. *intermediates
+ * points to a list of such intermediate states for this from state (chained
+ * through their tmp fields).
*/
-static int /* 0 couldn't, 1 could */
+static int
pull(struct nfa * nfa,
- struct arc * con)
+ struct arc * con,
+ struct state ** intermediates)
{
struct state *from = con->from;
struct state *to = con->to;
@@ -1580,7 +1605,11 @@ pull(struct nfa * nfa,
return 1;
}
- /* first, clone from state if necessary to avoid other outarcs */
+ /*
+ * First, clone from state if necessary to avoid other outarcs. This may
+ * seem wasteful, but it simplifies the logic, and we'll get rid of the
+ * clone state again at the bottom.
+ */
if (from->nouts > 1)
{
s = newstate(nfa);
@@ -1589,13 +1618,15 @@ pull(struct nfa * nfa,
copyins(nfa, from, s, 1); /* duplicate inarcs */
cparc(nfa, con, s, to); /* move constraint arc */
freearc(nfa, con);
+ if (NISERR())
+ return 0;
from = s;
con = from->outs;
}
assert(from->nouts == 1);
/* propagate the constraint into the from state's inarcs */
- for (a = from->ins; a != NULL; a = nexta)
+ for (a = from->ins; a != NULL && !NISERR(); a = nexta)
{
nexta = a->inchain;
switch (combine(con, a))
@@ -1606,13 +1637,23 @@ pull(struct nfa * nfa,
case SATISFIED: /* no action needed */
break;
case COMPATIBLE: /* swap the two arcs, more or less */
- s = newstate(nfa);
- if (NISERR())
- return 0;
- cparc(nfa, a, s, to); /* anticipate move */
+ /* need an intermediate state, but might have one already */
+ for (s = *intermediates; s != NULL; s = s->tmp)
+ {
+ assert(s->nins > 0 && s->nouts > 0);
+ if (s->ins->from == a->from && s->outs->to == to)
+ break;
+ }
+ if (s == NULL)
+ {
+ s = newstate(nfa);
+ if (NISERR())
+ return 0;
+ s->tmp = *intermediates;
+ *intermediates = s;
+ }
cparc(nfa, con, a->from, s);
- if (NISERR())
- return 0;
+ cparc(nfa, a, s, to);
freearc(nfa, a);
break;
default:
@@ -1623,7 +1664,8 @@ pull(struct nfa * nfa,
/* remaining inarcs, if any, incorporate the constraint */
moveins(nfa, from, to);
- dropstate(nfa, from); /* will free the constraint */
+ freearc(nfa, con);
+ /* from state is now useless, but we leave it to pullback() to clean up */
return 1;
}
@@ -1638,6 +1680,7 @@ pushfwd(struct nfa * nfa,
struct state *nexts;
struct arc *a;
struct arc *nexta;
+ struct state *intermediates;
int progress;
/* find and push until there are no more */
@@ -1647,14 +1690,25 @@ pushfwd(struct nfa * nfa,
for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
{
nexts = s->next;
+ intermediates = NULL;
for (a = s->ins; a != NULL && !NISERR(); a = nexta)
{
nexta = a->inchain;
if (a->type == '$' || a->type == AHEAD)
- if (push(nfa, a))
+ if (push(nfa, a, &intermediates))
progress = 1;
- assert(nexta == NULL || s->no != FREESTATE);
}
+ /* clear tmp fields of intermediate states created here */
+ while (intermediates != NULL)
+ {
+ struct state *ns = intermediates->tmp;
+
+ intermediates->tmp = NULL;
+ intermediates = ns;
+ }
+ /* if s is now useless, get rid of it */
+ if ((s->nins == 0 || s->nouts == 0) && !s->flag)
+ dropstate(nfa, s);
}
if (progress && f != NULL)
dumpnfa(nfa, f);
@@ -1682,13 +1736,26 @@ pushfwd(struct nfa * nfa,
/*
* push - push a forward constraint forward past its destination state
- * A significant property of this function is that it deletes at most
- * one state -- the constraint's to state -- and only if the constraint
- * was that state's last inarc.
+ *
+ * Returns 1 if successful (which it always is unless the destination is the
+ * post state or we have an internal error), 0 if nothing happened.
+ *
+ * A significant property of this function is that it deletes no pre-existing
+ * states, and no inarcs of the constraint's to state other than the given
+ * constraint arc. This makes the loops in pushfwd() safe, at the cost that
+ * we may leave useless states behind. Therefore, we leave it to pushfwd()
+ * to delete such states.
+ *
+ * If the to state has multiple forward-constraint inarcs, and/or multiple
+ * compatible constraint outarcs, we only need to create one new intermediate
+ * state per combination of predecessor and successor states. *intermediates
+ * points to a list of such intermediate states for this to state (chained
+ * through their tmp fields).
*/
-static int /* 0 couldn't, 1 could */
+static int
push(struct nfa * nfa,
- struct arc * con)
+ struct arc * con,
+ struct state ** intermediates)
{
struct state *from = con->from;
struct state *to = con->to;
@@ -1705,22 +1772,28 @@ push(struct nfa * nfa,
return 1;
}
- /* first, clone to state if necessary to avoid other inarcs */
+ /*
+ * First, clone to state if necessary to avoid other inarcs. This may
+ * seem wasteful, but it simplifies the logic, and we'll get rid of the
+ * clone state again at the bottom.
+ */
if (to->nins > 1)
{
s = newstate(nfa);
if (NISERR())
return 0;
copyouts(nfa, to, s, 1); /* duplicate outarcs */
- cparc(nfa, con, from, s); /* move constraint */
+ cparc(nfa, con, from, s); /* move constraint arc */
freearc(nfa, con);
+ if (NISERR())
+ return 0;
to = s;
con = to->ins;
}
assert(to->nins == 1);
/* propagate the constraint into the to state's outarcs */
- for (a = to->outs; a != NULL; a = nexta)
+ for (a = to->outs; a != NULL && !NISERR(); a = nexta)
{
nexta = a->outchain;
switch (combine(con, a))
@@ -1731,13 +1804,23 @@ push(struct nfa * nfa,
case SATISFIED: /* no action needed */
break;
case COMPATIBLE: /* swap the two arcs, more or less */
- s = newstate(nfa);
- if (NISERR())
- return 0;
- cparc(nfa, con, s, a->to); /* anticipate move */
+ /* need an intermediate state, but might have one already */
+ for (s = *intermediates; s != NULL; s = s->tmp)
+ {
+ assert(s->nins > 0 && s->nouts > 0);
+ if (s->ins->from == from && s->outs->to == a->to)
+ break;
+ }
+ if (s == NULL)
+ {
+ s = newstate(nfa);
+ if (NISERR())
+ return 0;
+ s->tmp = *intermediates;
+ *intermediates = s;
+ }
+ cparc(nfa, con, s, a->to);
cparc(nfa, a, from, s);
- if (NISERR())
- return 0;
freearc(nfa, a);
break;
default:
@@ -1748,7 +1831,8 @@ push(struct nfa * nfa,
/* remaining outarcs, if any, incorporate the constraint */
moveouts(nfa, to, from);
- dropstate(nfa, to); /* will free the constraint */
+ freearc(nfa, con);
+ /* to state is now useless, but we leave it to pushfwd() to clean up */
return 1;
}
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index ac73b42e651..60ee0f2880e 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -149,9 +149,9 @@ static void cleartraverse(struct nfa *, struct state *);
static void specialcolors(struct nfa *);
static long optimize(struct nfa *, FILE *);
static void pullback(struct nfa *, FILE *);
-static int pull(struct nfa *, struct arc *);
+static int pull(struct nfa *, struct arc *, struct state **);
static void pushfwd(struct nfa *, FILE *);
-static int push(struct nfa *, struct arc *);
+static int push(struct nfa *, struct arc *, struct state **);
#define INCOMPATIBLE 1 /* destroys arc */
#define SATISFIED 2 /* constraint satisfied */