aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/regex/regc_nfa.c13
-rw-r--r--src/backend/regex/rege_dfa.c145
-rw-r--r--src/backend/regex/regexec.c27
3 files changed, 154 insertions, 31 deletions
diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c
index 27998d688a8..e474e48c28c 100644
--- a/src/backend/regex/regc_nfa.c
+++ b/src/backend/regex/regc_nfa.c
@@ -180,7 +180,7 @@ newstate(struct nfa * nfa)
/*
* This is a handy place to check for operation cancel during regex
* compilation, since no code path will go very long without making a new
- * state.
+ * state or arc.
*/
if (CANCEL_REQUESTED(nfa->v->re))
{
@@ -333,6 +333,17 @@ newarc(struct nfa * nfa,
assert(from != NULL && to != NULL);
+ /*
+ * This is a handy place to check for operation cancel during regex
+ * compilation, since no code path will go very long without making a new
+ * state or arc.
+ */
+ if (CANCEL_REQUESTED(nfa->v->re))
+ {
+ NERR(REG_CANCEL);
+ return;
+ }
+
/* check for duplicates */
for (a = from->outs; a != NULL; a = a->outchain)
if (a->to == to && a->co == co && a->type == t)
diff --git a/src/backend/regex/rege_dfa.c b/src/backend/regex/rege_dfa.c
index d367a77e854..a9305dd7ccd 100644
--- a/src/backend/regex/rege_dfa.c
+++ b/src/backend/regex/rege_dfa.c
@@ -34,9 +34,12 @@
/*
* longest - longest-preferred matching engine
+ *
+ * On success, returns match endpoint address. Returns NULL on no match.
+ * Internal errors also return NULL, with v->err set.
*/
-static chr * /* endpoint, or NULL */
-longest(struct vars * v, /* used only for debug and exec flags */
+static chr *
+longest(struct vars * v,
struct dfa * d,
chr *start, /* where the match should start */
chr *stop, /* match must end at or before here */
@@ -51,11 +54,15 @@ longest(struct vars * v, /* used only for debug and exec flags */
int i;
struct colormap *cm = d->cm;
+ /* prevent "uninitialized variable" warnings */
+ if (hitstopp != NULL)
+ *hitstopp = 0;
+
/* initialize */
css = initialize(v, d, start);
+ if (css == NULL)
+ return NULL;
cp = start;
- if (hitstopp != NULL)
- *hitstopp = 0;
/* startup */
FDEBUG(("+++ startup +++\n"));
@@ -74,8 +81,14 @@ longest(struct vars * v, /* used only for debug and exec flags */
return NULL;
css->lastseen = cp;
- /* main loop */
+ /*
+ * This is the main text-scanning loop. It seems worth having two copies
+ * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
+ * builds, when you're not actively tracing.
+ */
+#ifdef REG_DEBUG
if (v->eflags & REG_FTRACE)
+ {
while (cp < realstop)
{
FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets)));
@@ -92,7 +105,10 @@ longest(struct vars * v, /* used only for debug and exec flags */
ss->lastseen = cp;
css = ss;
}
+ }
else
+#endif
+ {
while (cp < realstop)
{
co = GETCOLOR(cm, *cp);
@@ -107,6 +123,10 @@ longest(struct vars * v, /* used only for debug and exec flags */
ss->lastseen = cp;
css = ss;
}
+ }
+
+ if (ISERR())
+ return NULL;
/* shutdown */
FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets)));
@@ -117,6 +137,8 @@ longest(struct vars * v, /* used only for debug and exec flags */
co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
FDEBUG(("color %ld\n", (long) co));
ss = miss(v, d, css, co, cp, start);
+ if (ISERR())
+ return NULL;
/* special case: match ended at eol? */
if (ss != NULL && (ss->flags & POSTSTATE))
return cp;
@@ -138,14 +160,17 @@ longest(struct vars * v, /* used only for debug and exec flags */
/*
* shortest - shortest-preferred matching engine
+ *
+ * On success, returns match endpoint address. Returns NULL on no match.
+ * Internal errors also return NULL, with v->err set.
*/
-static chr * /* endpoint, or NULL */
+static chr *
shortest(struct vars * v,
struct dfa * d,
chr *start, /* where the match should start */
chr *min, /* match must end at or after here */
chr *max, /* match must end at or before here */
- chr **coldp, /* store coldstart pointer here, if nonNULL */
+ chr **coldp, /* store coldstart pointer here, if non-NULL */
int *hitstopp) /* record whether hit v->stop, if non-NULL */
{
chr *cp;
@@ -156,11 +181,17 @@ shortest(struct vars * v,
struct sset *ss;
struct colormap *cm = d->cm;
+ /* prevent "uninitialized variable" warnings */
+ if (coldp != NULL)
+ *coldp = NULL;
+ if (hitstopp != NULL)
+ *hitstopp = 0;
+
/* initialize */
css = initialize(v, d, start);
+ if (css == NULL)
+ return NULL;
cp = start;
- if (hitstopp != NULL)
- *hitstopp = 0;
/* startup */
FDEBUG(("--- startup ---\n"));
@@ -180,8 +211,14 @@ shortest(struct vars * v,
css->lastseen = cp;
ss = css;
- /* main loop */
+ /*
+ * This is the main text-scanning loop. It seems worth having two copies
+ * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
+ * builds, when you're not actively tracing.
+ */
+#ifdef REG_DEBUG
if (v->eflags & REG_FTRACE)
+ {
while (cp < realmax)
{
FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets)));
@@ -200,7 +237,10 @@ shortest(struct vars * v,
if ((ss->flags & POSTSTATE) && cp >= realmin)
break; /* NOTE BREAK OUT */
}
+ }
else
+#endif
+ {
while (cp < realmax)
{
co = GETCOLOR(cm, *cp);
@@ -217,6 +257,7 @@ shortest(struct vars * v,
if ((ss->flags & POSTSTATE) && cp >= realmin)
break; /* NOTE BREAK OUT */
}
+ }
if (ss == NULL)
return NULL;
@@ -389,7 +430,7 @@ hash(unsigned *uv,
* initialize - hand-craft a cache entry for startup, otherwise get ready
*/
static struct sset *
-initialize(struct vars * v, /* used only for debug flags */
+initialize(struct vars * v,
struct dfa * d,
chr *start)
{
@@ -402,6 +443,8 @@ initialize(struct vars * v, /* used only for debug flags */
else
{ /* no, must (re)build it */
ss = getvacant(v, d, start, start);
+ if (ss == NULL)
+ return NULL;
for (i = 0; i < d->wordsper; i++)
ss->states[i] = 0;
BSET(ss->states, d->cnfa->pre);
@@ -420,10 +463,20 @@ initialize(struct vars * v, /* used only for debug flags */
}
/*
- * miss - handle a cache miss
+ * miss - handle a stateset cache miss
+ *
+ * css is the current stateset, co is the color of the current input character,
+ * cp points to the character after that (which is where we may need to test
+ * LACONs). start does not affect matching behavior but is needed for pickss'
+ * heuristics about which stateset cache entry to replace.
+ *
+ * Ordinarily, returns the address of the next stateset (the one that is
+ * valid after consuming the input character). Returns NULL if no valid
+ * NFA states remain, ie we have a certain match failure.
+ * Internal errors also return NULL, with v->err set.
*/
-static struct sset * /* NULL if goes to empty set */
-miss(struct vars * v, /* used only for debug flags */
+static struct sset *
+miss(struct vars * v,
struct dfa * d,
struct sset * css,
pcolor co,
@@ -449,9 +502,23 @@ miss(struct vars * v, /* used only for debug flags */
}
FDEBUG(("miss\n"));
- /* first, what set of states would we end up in? */
+ /*
+ * Checking for operation cancel in the inner text search loop seems
+ * unduly expensive. As a compromise, check during cache misses.
+ */
+ if (CANCEL_REQUESTED(v->re))
+ {
+ ERR(REG_CANCEL);
+ return NULL;
+ }
+
+ /*
+ * What set of states would we end up in after consuming the co character?
+ * We first consider PLAIN arcs that consume the character, and then look
+ * to see what LACON arcs could be traversed after consuming it.
+ */
for (i = 0; i < d->wordsper; i++)
- d->work[i] = 0;
+ d->work[i] = 0; /* build new stateset bitmap in d->work */
ispost = 0;
noprogress = 1;
gotstate = 0;
@@ -468,22 +535,31 @@ miss(struct vars * v, /* used only for debug flags */
noprogress = 0;
FDEBUG(("%d -> %d\n", i, ca->to));
}
- dolacons = (gotstate) ? (cnfa->flags & HASLACONS) : 0;
+ if (!gotstate)
+ return NULL; /* character cannot reach any new state */
+ dolacons = (cnfa->flags & HASLACONS);
sawlacons = 0;
+ /* outer loop handles transitive closure of reachable-by-LACON states */
while (dolacons)
- { /* transitive closure */
+ {
dolacons = 0;
for (i = 0; i < d->nstates; i++)
if (ISBSET(d->work, i))
for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
{
if (ca->co < cnfa->ncolors)
- continue; /* NOTE CONTINUE */
- sawlacons = 1;
+ continue; /* not a LACON arc */
if (ISBSET(d->work, ca->to))
- continue; /* NOTE CONTINUE */
+ continue; /* arc would be a no-op anyway */
+ sawlacons = 1; /* this LACON affects our result */
if (!lacon(v, cnfa, cp, ca->co))
- continue; /* NOTE CONTINUE */
+ {
+ if (ISERR())
+ return NULL;
+ continue; /* LACON arc cannot be traversed */
+ }
+ if (ISERR())
+ return NULL;
BSET(d->work, ca->to);
dolacons = 1;
if (ca->to == cnfa->post)
@@ -493,11 +569,9 @@ miss(struct vars * v, /* used only for debug flags */
FDEBUG(("%d :> %d\n", i, ca->to));
}
}
- if (!gotstate)
- return NULL;
h = HASH(d->work, d->wordsper);
- /* next, is that in the cache? */
+ /* Is this stateset already in the cache? */
for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
if (HIT(h, d->work, p, d->wordsper))
{
@@ -507,6 +581,8 @@ miss(struct vars * v, /* used only for debug flags */
if (i == 0)
{ /* nope, need a new cache entry */
p = getvacant(v, d, cp, start);
+ if (p == NULL)
+ return NULL;
assert(p != css);
for (i = 0; i < d->wordsper; i++)
p->states[i] = d->work[i];
@@ -517,8 +593,15 @@ miss(struct vars * v, /* used only for debug flags */
/* lastseen to be dealt with by caller */
}
+ /*
+ * Link new stateset to old, unless a LACON affected the result, in which
+ * case we don't create the link. That forces future transitions across
+ * this same arc (same prior stateset and character color) to come through
+ * miss() again, so that we can recheck the LACON(s), which might or might
+ * not pass since context will be different.
+ */
if (!sawlacons)
- { /* lookahead conds. always cache miss */
+ {
FDEBUG(("c%d[%d]->c%d\n",
(int) (css - d->ssets), co, (int) (p - d->ssets)));
css->outs[co] = p;
@@ -562,11 +645,12 @@ lacon(struct vars * v,
/*
* getvacant - get a vacant state set
+ *
* This routine clears out the inarcs and outarcs, but does not otherwise
* clear the innards of the state set -- that's up to the caller.
*/
static struct sset *
-getvacant(struct vars * v, /* used only for debug flags */
+getvacant(struct vars * v,
struct dfa * d,
chr *cp,
chr *start)
@@ -578,6 +662,8 @@ getvacant(struct vars * v, /* used only for debug flags */
color co;
ss = pickss(v, d, cp, start);
+ if (ss == NULL)
+ return NULL;
assert(!(ss->flags & LOCKED));
/* clear out its inarcs, including self-referential ones */
@@ -635,7 +721,7 @@ getvacant(struct vars * v, /* used only for debug flags */
* pickss - pick the next stateset to be used
*/
static struct sset *
-pickss(struct vars * v, /* used only for debug flags */
+pickss(struct vars * v,
struct dfa * d,
chr *cp,
chr *start)
@@ -691,7 +777,6 @@ pickss(struct vars * v, /* used only for debug flags */
/* nobody's old enough?!? -- something's really wrong */
FDEBUG(("cannot find victim to replace!\n"));
- assert(NOTREACHED);
ERR(REG_ASSERT);
- return d->ssets;
+ return NULL;
}
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c
index b4a3dc3ab40..bed6098bf09 100644
--- a/src/backend/regex/regexec.c
+++ b/src/backend/regex/regexec.c
@@ -450,6 +450,11 @@ cfindloop(struct vars * v,
{
MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
+ if (ISERR())
+ {
+ *coldp = cold;
+ return v->err;
+ }
if (close == NULL)
break; /* NOTE BREAK */
assert(cold != NULL);
@@ -469,6 +474,11 @@ cfindloop(struct vars * v,
else
end = longest(v, d, begin, estop,
&hitend);
+ if (ISERR())
+ {
+ *coldp = cold;
+ return v->err;
+ }
if (hitend && cold == NULL)
cold = begin;
if (end == NULL)
@@ -681,6 +691,7 @@ ccondissect(struct vars * v,
/* pick a tentative midpoint */
mid = longest(v, d, begin, end, (int *) NULL);
+ NOERR();
if (mid == NULL)
return REG_NOMATCH;
MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
@@ -705,6 +716,7 @@ ccondissect(struct vars * v,
if (er != REG_NOMATCH)
return er;
}
+ NOERR();
/* that midpoint didn't work, find a new one */
if (mid == begin)
@@ -714,6 +726,7 @@ ccondissect(struct vars * v,
return REG_NOMATCH;
}
mid = longest(v, d, begin, mid - 1, (int *) NULL);
+ NOERR();
if (mid == NULL)
{
/* failed to find a new one */
@@ -756,6 +769,7 @@ crevcondissect(struct vars * v,
/* pick a tentative midpoint */
mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
+ NOERR();
if (mid == NULL)
return REG_NOMATCH;
MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
@@ -780,6 +794,7 @@ crevcondissect(struct vars * v,
if (er != REG_NOMATCH)
return er;
}
+ NOERR();
/* that midpoint didn't work, find a new one */
if (mid == end)
@@ -789,6 +804,7 @@ crevcondissect(struct vars * v,
return REG_NOMATCH;
}
mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
+ NOERR();
if (mid == NULL)
{
/* failed to find a new one */
@@ -914,6 +930,7 @@ caltdissect(struct vars * v,
if (er != REG_NOMATCH)
return er;
}
+ NOERR();
t = t->right;
}
@@ -1005,6 +1022,11 @@ citerdissect(struct vars * v,
{
/* try to find an endpoint for the k'th sub-match */
endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
+ if (ISERR())
+ {
+ FREE(endpts);
+ return v->err;
+ }
if (endpts[k] == NULL)
{
/* no match possible, so see if we can shorten previous one */
@@ -1201,6 +1223,11 @@ creviterdissect(struct vars * v,
/* try to find an endpoint for the k'th sub-match */
endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
(chr **) NULL, (int *) NULL);
+ if (ISERR())
+ {
+ FREE(endpts);
+ return v->err;
+ }
if (endpts[k] == NULL)
{
/* no match possible, so see if we can lengthen previous one */