diff options
Diffstat (limited to 'src/backend/regex/rege_dfa.c')
-rw-r--r-- | src/backend/regex/rege_dfa.c | 145 |
1 files changed, 115 insertions, 30 deletions
diff --git a/src/backend/regex/rege_dfa.c b/src/backend/regex/rege_dfa.c index d367a77e854..a9305dd7ccd 100644 --- a/src/backend/regex/rege_dfa.c +++ b/src/backend/regex/rege_dfa.c @@ -34,9 +34,12 @@ /* * longest - longest-preferred matching engine + * + * On success, returns match endpoint address. Returns NULL on no match. + * Internal errors also return NULL, with v->err set. */ -static chr * /* endpoint, or NULL */ -longest(struct vars * v, /* used only for debug and exec flags */ +static chr * +longest(struct vars * v, struct dfa * d, chr *start, /* where the match should start */ chr *stop, /* match must end at or before here */ @@ -51,11 +54,15 @@ longest(struct vars * v, /* used only for debug and exec flags */ int i; struct colormap *cm = d->cm; + /* prevent "uninitialized variable" warnings */ + if (hitstopp != NULL) + *hitstopp = 0; + /* initialize */ css = initialize(v, d, start); + if (css == NULL) + return NULL; cp = start; - if (hitstopp != NULL) - *hitstopp = 0; /* startup */ FDEBUG(("+++ startup +++\n")); @@ -74,8 +81,14 @@ longest(struct vars * v, /* used only for debug and exec flags */ return NULL; css->lastseen = cp; - /* main loop */ + /* + * This is the main text-scanning loop. It seems worth having two copies + * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG + * builds, when you're not actively tracing. + */ +#ifdef REG_DEBUG if (v->eflags & REG_FTRACE) + { while (cp < realstop) { FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets))); @@ -92,7 +105,10 @@ longest(struct vars * v, /* used only for debug and exec flags */ ss->lastseen = cp; css = ss; } + } else +#endif + { while (cp < realstop) { co = GETCOLOR(cm, *cp); @@ -107,6 +123,10 @@ longest(struct vars * v, /* used only for debug and exec flags */ ss->lastseen = cp; css = ss; } + } + + if (ISERR()) + return NULL; /* shutdown */ FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets))); @@ -117,6 +137,8 @@ longest(struct vars * v, /* used only for debug and exec flags */ co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1]; FDEBUG(("color %ld\n", (long) co)); ss = miss(v, d, css, co, cp, start); + if (ISERR()) + return NULL; /* special case: match ended at eol? */ if (ss != NULL && (ss->flags & POSTSTATE)) return cp; @@ -138,14 +160,17 @@ longest(struct vars * v, /* used only for debug and exec flags */ /* * shortest - shortest-preferred matching engine + * + * On success, returns match endpoint address. Returns NULL on no match. + * Internal errors also return NULL, with v->err set. */ -static chr * /* endpoint, or NULL */ +static chr * shortest(struct vars * v, struct dfa * d, chr *start, /* where the match should start */ chr *min, /* match must end at or after here */ chr *max, /* match must end at or before here */ - chr **coldp, /* store coldstart pointer here, if nonNULL */ + chr **coldp, /* store coldstart pointer here, if non-NULL */ int *hitstopp) /* record whether hit v->stop, if non-NULL */ { chr *cp; @@ -156,11 +181,17 @@ shortest(struct vars * v, struct sset *ss; struct colormap *cm = d->cm; + /* prevent "uninitialized variable" warnings */ + if (coldp != NULL) + *coldp = NULL; + if (hitstopp != NULL) + *hitstopp = 0; + /* initialize */ css = initialize(v, d, start); + if (css == NULL) + return NULL; cp = start; - if (hitstopp != NULL) - *hitstopp = 0; /* startup */ FDEBUG(("--- startup ---\n")); @@ -180,8 +211,14 @@ shortest(struct vars * v, css->lastseen = cp; ss = css; - /* main loop */ + /* + * This is the main text-scanning loop. It seems worth having two copies + * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG + * builds, when you're not actively tracing. + */ +#ifdef REG_DEBUG if (v->eflags & REG_FTRACE) + { while (cp < realmax) { FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets))); @@ -200,7 +237,10 @@ shortest(struct vars * v, if ((ss->flags & POSTSTATE) && cp >= realmin) break; /* NOTE BREAK OUT */ } + } else +#endif + { while (cp < realmax) { co = GETCOLOR(cm, *cp); @@ -217,6 +257,7 @@ shortest(struct vars * v, if ((ss->flags & POSTSTATE) && cp >= realmin) break; /* NOTE BREAK OUT */ } + } if (ss == NULL) return NULL; @@ -389,7 +430,7 @@ hash(unsigned *uv, * initialize - hand-craft a cache entry for startup, otherwise get ready */ static struct sset * -initialize(struct vars * v, /* used only for debug flags */ +initialize(struct vars * v, struct dfa * d, chr *start) { @@ -402,6 +443,8 @@ initialize(struct vars * v, /* used only for debug flags */ else { /* no, must (re)build it */ ss = getvacant(v, d, start, start); + if (ss == NULL) + return NULL; for (i = 0; i < d->wordsper; i++) ss->states[i] = 0; BSET(ss->states, d->cnfa->pre); @@ -420,10 +463,20 @@ initialize(struct vars * v, /* used only for debug flags */ } /* - * miss - handle a cache miss + * miss - handle a stateset cache miss + * + * css is the current stateset, co is the color of the current input character, + * cp points to the character after that (which is where we may need to test + * LACONs). start does not affect matching behavior but is needed for pickss' + * heuristics about which stateset cache entry to replace. + * + * Ordinarily, returns the address of the next stateset (the one that is + * valid after consuming the input character). Returns NULL if no valid + * NFA states remain, ie we have a certain match failure. + * Internal errors also return NULL, with v->err set. */ -static struct sset * /* NULL if goes to empty set */ -miss(struct vars * v, /* used only for debug flags */ +static struct sset * +miss(struct vars * v, struct dfa * d, struct sset * css, pcolor co, @@ -449,9 +502,23 @@ miss(struct vars * v, /* used only for debug flags */ } FDEBUG(("miss\n")); - /* first, what set of states would we end up in? */ + /* + * Checking for operation cancel in the inner text search loop seems + * unduly expensive. As a compromise, check during cache misses. + */ + if (CANCEL_REQUESTED(v->re)) + { + ERR(REG_CANCEL); + return NULL; + } + + /* + * What set of states would we end up in after consuming the co character? + * We first consider PLAIN arcs that consume the character, and then look + * to see what LACON arcs could be traversed after consuming it. + */ for (i = 0; i < d->wordsper; i++) - d->work[i] = 0; + d->work[i] = 0; /* build new stateset bitmap in d->work */ ispost = 0; noprogress = 1; gotstate = 0; @@ -468,22 +535,31 @@ miss(struct vars * v, /* used only for debug flags */ noprogress = 0; FDEBUG(("%d -> %d\n", i, ca->to)); } - dolacons = (gotstate) ? (cnfa->flags & HASLACONS) : 0; + if (!gotstate) + return NULL; /* character cannot reach any new state */ + dolacons = (cnfa->flags & HASLACONS); sawlacons = 0; + /* outer loop handles transitive closure of reachable-by-LACON states */ while (dolacons) - { /* transitive closure */ + { dolacons = 0; for (i = 0; i < d->nstates; i++) if (ISBSET(d->work, i)) for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++) { if (ca->co < cnfa->ncolors) - continue; /* NOTE CONTINUE */ - sawlacons = 1; + continue; /* not a LACON arc */ if (ISBSET(d->work, ca->to)) - continue; /* NOTE CONTINUE */ + continue; /* arc would be a no-op anyway */ + sawlacons = 1; /* this LACON affects our result */ if (!lacon(v, cnfa, cp, ca->co)) - continue; /* NOTE CONTINUE */ + { + if (ISERR()) + return NULL; + continue; /* LACON arc cannot be traversed */ + } + if (ISERR()) + return NULL; BSET(d->work, ca->to); dolacons = 1; if (ca->to == cnfa->post) @@ -493,11 +569,9 @@ miss(struct vars * v, /* used only for debug flags */ FDEBUG(("%d :> %d\n", i, ca->to)); } } - if (!gotstate) - return NULL; h = HASH(d->work, d->wordsper); - /* next, is that in the cache? */ + /* Is this stateset already in the cache? */ for (p = d->ssets, i = d->nssused; i > 0; p++, i--) if (HIT(h, d->work, p, d->wordsper)) { @@ -507,6 +581,8 @@ miss(struct vars * v, /* used only for debug flags */ if (i == 0) { /* nope, need a new cache entry */ p = getvacant(v, d, cp, start); + if (p == NULL) + return NULL; assert(p != css); for (i = 0; i < d->wordsper; i++) p->states[i] = d->work[i]; @@ -517,8 +593,15 @@ miss(struct vars * v, /* used only for debug flags */ /* lastseen to be dealt with by caller */ } + /* + * Link new stateset to old, unless a LACON affected the result, in which + * case we don't create the link. That forces future transitions across + * this same arc (same prior stateset and character color) to come through + * miss() again, so that we can recheck the LACON(s), which might or might + * not pass since context will be different. + */ if (!sawlacons) - { /* lookahead conds. always cache miss */ + { FDEBUG(("c%d[%d]->c%d\n", (int) (css - d->ssets), co, (int) (p - d->ssets))); css->outs[co] = p; @@ -562,11 +645,12 @@ lacon(struct vars * v, /* * getvacant - get a vacant state set + * * This routine clears out the inarcs and outarcs, but does not otherwise * clear the innards of the state set -- that's up to the caller. */ static struct sset * -getvacant(struct vars * v, /* used only for debug flags */ +getvacant(struct vars * v, struct dfa * d, chr *cp, chr *start) @@ -578,6 +662,8 @@ getvacant(struct vars * v, /* used only for debug flags */ color co; ss = pickss(v, d, cp, start); + if (ss == NULL) + return NULL; assert(!(ss->flags & LOCKED)); /* clear out its inarcs, including self-referential ones */ @@ -635,7 +721,7 @@ getvacant(struct vars * v, /* used only for debug flags */ * pickss - pick the next stateset to be used */ static struct sset * -pickss(struct vars * v, /* used only for debug flags */ +pickss(struct vars * v, struct dfa * d, chr *cp, chr *start) @@ -691,7 +777,6 @@ pickss(struct vars * v, /* used only for debug flags */ /* nobody's old enough?!? -- something's really wrong */ FDEBUG(("cannot find victim to replace!\n")); - assert(NOTREACHED); ERR(REG_ASSERT); - return d->ssets; + return NULL; } |