aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/tsearch/wparser_def.c57
-rw-r--r--src/backend/utils/adt/tsvector_op.c3
2 files changed, 35 insertions, 25 deletions
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 76b6f9aef03..7b29062a97e 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -2003,24 +2003,14 @@ checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
* Returns -1 if no such index
*/
static int
-hlFirstIndex(HeadlineParsedText *prs, TSQuery query, int pos)
+hlFirstIndex(HeadlineParsedText *prs, int pos)
{
int i;
- /* For each word ... */
for (i = pos; i < prs->curwords; i++)
{
- /* ... scan the query to see if this word matches any operand */
- QueryItem *item = GETQUERY(query);
- int j;
-
- for (j = 0; j < query->size; j++)
- {
- if (item->type == QI_VAL &&
- prs->words[i].item == &item->qoperand)
- return i;
- item++;
- }
+ if (prs->words[i].item != NULL)
+ return i;
}
return -1;
}
@@ -2028,8 +2018,14 @@ hlFirstIndex(HeadlineParsedText *prs, TSQuery query, int pos)
/*
* hlCover: try to find a substring of prs' word list that satisfies query
*
- * At entry, *p must be the first word index to consider (initialize this to
- * zero, or to the next index after a previous successful search).
+ * At entry, *p must be the first word index to consider (initialize this
+ * to zero, or to the next index after a previous successful search).
+ * We will consider all substrings starting at or after that word, and
+ * containing no more than max_cover words. (We need a length limit to
+ * keep this from taking O(N^2) time for a long document with many query
+ * words but few complete matches. Actually, since checkcondition_HL is
+ * roughly O(N) in the length of the substring being checked, it's even
+ * worse than that.)
*
* On success, sets *p to first word index and *q to last word index of the
* cover substring, and returns true.
@@ -2038,7 +2034,8 @@ hlFirstIndex(HeadlineParsedText *prs, TSQuery query, int pos)
* words used in the query.
*/
static bool
-hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q)
+hlCover(HeadlineParsedText *prs, TSQuery query, int max_cover,
+ int *p, int *q)
{
int pmin,
pmax,
@@ -2052,7 +2049,7 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q)
* appearing in the query; there's no point in trying endpoints in between
* such points.
*/
- pmin = hlFirstIndex(prs, query, *p);
+ pmin = hlFirstIndex(prs, *p);
while (pmin >= 0)
{
/* This useless assignment just keeps stupider compilers quiet */
@@ -2073,7 +2070,7 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q)
return true;
}
/* Nope, so advance pmax to next feasible endpoint */
- nextpmax = hlFirstIndex(prs, query, pmax + 1);
+ nextpmax = hlFirstIndex(prs, pmax + 1);
/*
* If this is our first advance past pmin, then the result is also
@@ -2084,7 +2081,7 @@ hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q)
nextpmin = nextpmax;
pmax = nextpmax;
}
- while (pmax >= 0);
+ while (pmax >= 0 && pmax - pmin < max_cover);
/* No luck here, so try next feasible startpoint */
pmin = nextpmin;
}
@@ -2186,7 +2183,7 @@ get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos,
static void
mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall,
int shortword, int min_words,
- int max_words, int max_fragments)
+ int max_words, int max_fragments, int max_cover)
{
int32 poslen,
curlen,
@@ -2213,7 +2210,7 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall,
covers = palloc(maxcovers * sizeof(CoverPos));
/* get all covers */
- while (hlCover(prs, query, &p, &q))
+ while (hlCover(prs, query, max_cover, &p, &q))
{
startpos = p;
endpos = q;
@@ -2368,7 +2365,7 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall,
*/
static void
mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall,
- int shortword, int min_words, int max_words)
+ int shortword, int min_words, int max_words, int max_cover)
{
int p = 0,
q = 0;
@@ -2386,7 +2383,7 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall,
if (!highlightall)
{
/* examine all covers, select a headline using the best one */
- while (hlCover(prs, query, &p, &q))
+ while (hlCover(prs, query, max_cover, &p, &q))
{
/*
* Count words (curlen) and interesting words (poslen) within
@@ -2542,6 +2539,7 @@ prsd_headline(PG_FUNCTION_ARGS)
int shortword = 3;
int max_fragments = 0;
bool highlightall = false;
+ int max_cover;
ListCell *l;
/* Extract configuration option values */
@@ -2581,6 +2579,15 @@ prsd_headline(PG_FUNCTION_ARGS)
defel->defname)));
}
+ /*
+ * We might eventually make max_cover a user-settable parameter, but for
+ * now, just compute a reasonable value based on max_words and
+ * max_fragments.
+ */
+ max_cover = Max(max_words * 10, 100);
+ if (max_fragments > 0)
+ max_cover *= max_fragments;
+
/* in HighlightAll mode these parameters are ignored */
if (!highlightall)
{
@@ -2605,10 +2612,10 @@ prsd_headline(PG_FUNCTION_ARGS)
/* Apply appropriate headline selector */
if (max_fragments == 0)
mark_hl_words(prs, query, highlightall, shortword,
- min_words, max_words);
+ min_words, max_words, max_cover);
else
mark_hl_fragments(prs, query, highlightall, shortword,
- min_words, max_words, max_fragments);
+ min_words, max_words, max_fragments, max_cover);
/* Fill in default values for string options */
if (!prs->startsel)
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index f01b1ee2537..756a48a167a 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -1868,6 +1868,9 @@ TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
+ /* ... and let's check for query cancel while we're at it */
+ CHECK_FOR_INTERRUPTS();
+
if (curitem->type == QI_VAL)
return chkcond(arg, (QueryOperand *) curitem,
NULL /* don't need position info */ );