diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/regex/regc_nfa.c | 13 | ||||
-rw-r--r-- | src/backend/regex/rege_dfa.c | 145 | ||||
-rw-r--r-- | src/backend/regex/regexec.c | 37 |
3 files changed, 158 insertions, 37 deletions
diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index e0d5af205f6..e834274dacb 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -180,7 +180,7 @@ newstate(struct nfa * nfa) /* * This is a handy place to check for operation cancel during regex * compilation, since no code path will go very long without making a new - * state. + * state or arc. */ if (CANCEL_REQUESTED(nfa->v->re)) { @@ -333,6 +333,17 @@ newarc(struct nfa * nfa, assert(from != NULL && to != NULL); + /* + * This is a handy place to check for operation cancel during regex + * compilation, since no code path will go very long without making a new + * state or arc. + */ + if (CANCEL_REQUESTED(nfa->v->re)) + { + NERR(REG_CANCEL); + return; + } + /* check for duplicates */ for (a = from->outs; a != NULL; a = a->outchain) if (a->to == to && a->co == co && a->type == t) diff --git a/src/backend/regex/rege_dfa.c b/src/backend/regex/rege_dfa.c index f51a2472761..15ef1d8ef9b 100644 --- a/src/backend/regex/rege_dfa.c +++ b/src/backend/regex/rege_dfa.c @@ -34,9 +34,12 @@ /* * longest - longest-preferred matching engine + * + * On success, returns match endpoint address. Returns NULL on no match. + * Internal errors also return NULL, with v->err set. */ -static chr * /* endpoint, or NULL */ -longest(struct vars * v, /* used only for debug and exec flags */ +static chr * +longest(struct vars * v, struct dfa * d, chr *start, /* where the match should start */ chr *stop, /* match must end at or before here */ @@ -51,11 +54,15 @@ longest(struct vars * v, /* used only for debug and exec flags */ int i; struct colormap *cm = d->cm; + /* prevent "uninitialized variable" warnings */ + if (hitstopp != NULL) + *hitstopp = 0; + /* initialize */ css = initialize(v, d, start); + if (css == NULL) + return NULL; cp = start; - if (hitstopp != NULL) - *hitstopp = 0; /* startup */ FDEBUG(("+++ startup +++\n")); @@ -74,8 +81,14 @@ longest(struct vars * v, /* used only for debug and exec flags */ return NULL; css->lastseen = cp; - /* main loop */ + /* + * This is the main text-scanning loop. It seems worth having two copies + * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG + * builds, when you're not actively tracing. + */ +#ifdef REG_DEBUG if (v->eflags & REG_FTRACE) + { while (cp < realstop) { FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets))); @@ -92,7 +105,10 @@ longest(struct vars * v, /* used only for debug and exec flags */ ss->lastseen = cp; css = ss; } + } else +#endif + { while (cp < realstop) { co = GETCOLOR(cm, *cp); @@ -107,6 +123,10 @@ longest(struct vars * v, /* used only for debug and exec flags */ ss->lastseen = cp; css = ss; } + } + + if (ISERR()) + return NULL; /* shutdown */ FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets))); @@ -117,6 +137,8 @@ longest(struct vars * v, /* used only for debug and exec flags */ co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1]; FDEBUG(("color %ld\n", (long) co)); ss = miss(v, d, css, co, cp, start); + if (ISERR()) + return NULL; /* special case: match ended at eol? */ if (ss != NULL && (ss->flags & POSTSTATE)) return cp; @@ -138,14 +160,17 @@ longest(struct vars * v, /* used only for debug and exec flags */ /* * shortest - shortest-preferred matching engine + * + * On success, returns match endpoint address. Returns NULL on no match. + * Internal errors also return NULL, with v->err set. */ -static chr * /* endpoint, or NULL */ +static chr * shortest(struct vars * v, struct dfa * d, chr *start, /* where the match should start */ chr *min, /* match must end at or after here */ chr *max, /* match must end at or before here */ - chr **coldp, /* store coldstart pointer here, if nonNULL */ + chr **coldp, /* store coldstart pointer here, if non-NULL */ int *hitstopp) /* record whether hit v->stop, if non-NULL */ { chr *cp; @@ -156,11 +181,17 @@ shortest(struct vars * v, struct sset *ss; struct colormap *cm = d->cm; + /* prevent "uninitialized variable" warnings */ + if (coldp != NULL) + *coldp = NULL; + if (hitstopp != NULL) + *hitstopp = 0; + /* initialize */ css = initialize(v, d, start); + if (css == NULL) + return NULL; cp = start; - if (hitstopp != NULL) - *hitstopp = 0; /* startup */ FDEBUG(("--- startup ---\n")); @@ -180,8 +211,14 @@ shortest(struct vars * v, css->lastseen = cp; ss = css; - /* main loop */ + /* + * This is the main text-scanning loop. It seems worth having two copies + * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG + * builds, when you're not actively tracing. + */ +#ifdef REG_DEBUG if (v->eflags & REG_FTRACE) + { while (cp < realmax) { FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets))); @@ -200,7 +237,10 @@ shortest(struct vars * v, if ((ss->flags & POSTSTATE) && cp >= realmin) break; /* NOTE BREAK OUT */ } + } else +#endif + { while (cp < realmax) { co = GETCOLOR(cm, *cp); @@ -217,6 +257,7 @@ shortest(struct vars * v, if ((ss->flags & POSTSTATE) && cp >= realmin) break; /* NOTE BREAK OUT */ } + } if (ss == NULL) return NULL; @@ -390,7 +431,7 @@ hash(unsigned *uv, * initialize - hand-craft a cache entry for startup, otherwise get ready */ static struct sset * -initialize(struct vars * v, /* used only for debug flags */ +initialize(struct vars * v, struct dfa * d, chr *start) { @@ -403,6 +444,8 @@ initialize(struct vars * v, /* used only for debug flags */ else { /* no, must (re)build it */ ss = getvacant(v, d, start, start); + if (ss == NULL) + return NULL; for (i = 0; i < d->wordsper; i++) ss->states[i] = 0; BSET(ss->states, d->cnfa->pre); @@ -421,10 +464,20 @@ initialize(struct vars * v, /* used only for debug flags */ } /* - * miss - handle a cache miss + * miss - handle a stateset cache miss + * + * css is the current stateset, co is the color of the current input character, + * cp points to the character after that (which is where we may need to test + * LACONs). start does not affect matching behavior but is needed for pickss' + * heuristics about which stateset cache entry to replace. + * + * Ordinarily, returns the address of the next stateset (the one that is + * valid after consuming the input character). Returns NULL if no valid + * NFA states remain, ie we have a certain match failure. + * Internal errors also return NULL, with v->err set. */ -static struct sset * /* NULL if goes to empty set */ -miss(struct vars * v, /* used only for debug flags */ +static struct sset * +miss(struct vars * v, struct dfa * d, struct sset * css, pcolor co, @@ -450,9 +503,23 @@ miss(struct vars * v, /* used only for debug flags */ } FDEBUG(("miss\n")); - /* first, what set of states would we end up in? */ + /* + * Checking for operation cancel in the inner text search loop seems + * unduly expensive. As a compromise, check during cache misses. + */ + if (CANCEL_REQUESTED(v->re)) + { + ERR(REG_CANCEL); + return NULL; + } + + /* + * What set of states would we end up in after consuming the co character? + * We first consider PLAIN arcs that consume the character, and then look + * to see what LACON arcs could be traversed after consuming it. + */ for (i = 0; i < d->wordsper; i++) - d->work[i] = 0; + d->work[i] = 0; /* build new stateset bitmap in d->work */ ispost = 0; noprogress = 1; gotstate = 0; @@ -469,22 +536,31 @@ miss(struct vars * v, /* used only for debug flags */ noprogress = 0; FDEBUG(("%d -> %d\n", i, ca->to)); } - dolacons = (gotstate) ? (cnfa->flags & HASLACONS) : 0; + if (!gotstate) + return NULL; /* character cannot reach any new state */ + dolacons = (cnfa->flags & HASLACONS); sawlacons = 0; + /* outer loop handles transitive closure of reachable-by-LACON states */ while (dolacons) - { /* transitive closure */ + { dolacons = 0; for (i = 0; i < d->nstates; i++) if (ISBSET(d->work, i)) for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++) { if (ca->co < cnfa->ncolors) - continue; /* NOTE CONTINUE */ - sawlacons = 1; + continue; /* not a LACON arc */ if (ISBSET(d->work, ca->to)) - continue; /* NOTE CONTINUE */ + continue; /* arc would be a no-op anyway */ + sawlacons = 1; /* this LACON affects our result */ if (!lacon(v, cnfa, cp, ca->co)) - continue; /* NOTE CONTINUE */ + { + if (ISERR()) + return NULL; + continue; /* LACON arc cannot be traversed */ + } + if (ISERR()) + return NULL; BSET(d->work, ca->to); dolacons = 1; if (ca->to == cnfa->post) @@ -494,11 +570,9 @@ miss(struct vars * v, /* used only for debug flags */ FDEBUG(("%d :> %d\n", i, ca->to)); } } - if (!gotstate) - return NULL; h = HASH(d->work, d->wordsper); - /* next, is that in the cache? */ + /* Is this stateset already in the cache? */ for (p = d->ssets, i = d->nssused; i > 0; p++, i--) if (HIT(h, d->work, p, d->wordsper)) { @@ -508,6 +582,8 @@ miss(struct vars * v, /* used only for debug flags */ if (i == 0) { /* nope, need a new cache entry */ p = getvacant(v, d, cp, start); + if (p == NULL) + return NULL; assert(p != css); for (i = 0; i < d->wordsper; i++) p->states[i] = d->work[i]; @@ -518,8 +594,15 @@ miss(struct vars * v, /* used only for debug flags */ /* lastseen to be dealt with by caller */ } + /* + * Link new stateset to old, unless a LACON affected the result, in which + * case we don't create the link. That forces future transitions across + * this same arc (same prior stateset and character color) to come through + * miss() again, so that we can recheck the LACON(s), which might or might + * not pass since context will be different. + */ if (!sawlacons) - { /* lookahead conds. always cache miss */ + { FDEBUG(("c%d[%d]->c%d\n", (int) (css - d->ssets), co, (int) (p - d->ssets))); css->outs[co] = p; @@ -563,11 +646,12 @@ lacon(struct vars * v, /* * getvacant - get a vacant state set + * * This routine clears out the inarcs and outarcs, but does not otherwise * clear the innards of the state set -- that's up to the caller. */ static struct sset * -getvacant(struct vars * v, /* used only for debug flags */ +getvacant(struct vars * v, struct dfa * d, chr *cp, chr *start) @@ -579,6 +663,8 @@ getvacant(struct vars * v, /* used only for debug flags */ color co; ss = pickss(v, d, cp, start); + if (ss == NULL) + return NULL; assert(!(ss->flags & LOCKED)); /* clear out its inarcs, including self-referential ones */ @@ -636,7 +722,7 @@ getvacant(struct vars * v, /* used only for debug flags */ * pickss - pick the next stateset to be used */ static struct sset * -pickss(struct vars * v, /* used only for debug flags */ +pickss(struct vars * v, struct dfa * d, chr *cp, chr *start) @@ -692,7 +778,6 @@ pickss(struct vars * v, /* used only for debug flags */ /* nobody's old enough?!? -- something's really wrong */ FDEBUG(("cannot find victim to replace!\n")); - assert(NOTREACHED); ERR(REG_ASSERT); - return d->ssets; + return NULL; } diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index 7b98b18da7e..ee16a5ef412 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -428,6 +428,11 @@ cfindloop(struct vars * v, { MDEBUG(("\ncsearch at %ld\n", LOFF(close))); close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL); + if (ISERR()) + { + *coldp = cold; + return v->err; + } if (close == NULL) break; /* NOTE BREAK */ assert(cold != NULL); @@ -447,6 +452,11 @@ cfindloop(struct vars * v, else end = longest(v, d, begin, estop, &hitend); + if (ISERR()) + { + *coldp = cold; + return v->err; + } if (hitend && cold == NULL) cold = begin; if (end == NULL) @@ -626,15 +636,22 @@ condissect(struct vars * v, mid = longest(v, d, begin, end, (int *) NULL); if (mid == NULL) { + ERR(REG_ASSERT); /* if no other error reported already */ freedfa(d); freedfa(d2); - return REG_ASSERT; + return v->err; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* iterate until satisfaction or failure */ while (longest(v, d2, mid, end, (int *) NULL) != end) { + if (ISERR()) + { + freedfa(d); + freedfa(d2); + return v->err; + } /* that midpoint didn't work, find a new one */ if (mid == stop) { @@ -653,9 +670,10 @@ condissect(struct vars * v, { /* failed to find a new one! */ MDEBUG(("failed midpoint!\n")); + ERR(REG_ASSERT); /* if no other error reported already */ freedfa(d); freedfa(d2); - return REG_ASSERT; + return v->err; } MDEBUG(("new midpoint %ld\n", LOFF(mid))); } @@ -690,8 +708,7 @@ altdissect(struct vars * v, MDEBUG(("trying %dth\n", i)); assert(t->left != NULL && t->left->cnfa.nstates > 0); d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); - if (ISERR()) - return v->err; + NOERR(); if (longest(v, d, begin, end, (int *) NULL) == end) { MDEBUG(("success\n")); @@ -699,6 +716,7 @@ altdissect(struct vars * v, return dissect(v, t->left, begin, end); } freedfa(d); + NOERR(); } return REG_ASSERT; /* none of them matched?!? */ } @@ -804,6 +822,7 @@ ccondissect(struct vars * v, { freedfa(d); freedfa(d2); + NOERR(); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); @@ -843,12 +862,13 @@ ccondissect(struct vars * v, } /* that midpoint didn't work, find a new one */ - if (mid == begin) + if (ISERR() || mid == begin) { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); + NOERR(); return REG_NOMATCH; } mid = longest(v, d, begin, mid - 1, (int *) NULL); @@ -858,6 +878,7 @@ ccondissect(struct vars * v, MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); freedfa(d2); + NOERR(); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); @@ -911,6 +932,7 @@ crevdissect(struct vars * v, { freedfa(d); freedfa(d2); + NOERR(); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); @@ -950,12 +972,13 @@ crevdissect(struct vars * v, } /* that midpoint didn't work, find a new one */ - if (mid == end) + if (ISERR() || mid == end) { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); + NOERR(); return REG_NOMATCH; } mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); @@ -965,6 +988,7 @@ crevdissect(struct vars * v, MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); freedfa(d2); + NOERR(); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); @@ -1096,6 +1120,7 @@ caltdissect(struct vars * v, if (longest(v, d, begin, end, (int *) NULL) != end) { freedfa(d); + NOERR(); v->mem[t->retry] = TRIED; return caltdissect(v, t->right, begin, end); } |