diff options
Diffstat (limited to 'src/backend/utils')
-rw-r--r-- | src/backend/utils/adt/tsginidx.c | 133 | ||||
-rw-r--r-- | src/backend/utils/adt/tsgistidx.c | 26 | ||||
-rw-r--r-- | src/backend/utils/adt/tsrank.c | 12 | ||||
-rw-r--r-- | src/backend/utils/adt/tsvector_op.c | 147 |
4 files changed, 146 insertions, 172 deletions
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c index 2d656168fca..3128f0a7da0 100644 --- a/src/backend/utils/adt/tsginidx.c +++ b/src/backend/utils/adt/tsginidx.c @@ -178,9 +178,13 @@ typedef struct bool *need_recheck; } GinChkVal; -static GinTernaryValue -checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data) +/* + * TS_execute callback for matching a tsquery operand to GIN index data + */ +static TSTernaryValue +checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data) { + GinChkVal *gcv = (GinChkVal *) checkval; int j; /* @@ -193,112 +197,22 @@ checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *d /* convert item's number to corresponding entry's (operand's) number */ j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item]; - /* return presence of current entry in indexed value */ - return gcv->check[j]; -} - -/* - * Wrapper of check condition function for TS_execute. - */ -static bool -checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data) -{ - return checkcondition_gin_internal((GinChkVal *) checkval, - val, - data) != GIN_FALSE; -} - -/* - * Evaluate tsquery boolean expression using ternary logic. - * - * Note: the reason we can't use TS_execute() for this is that its API - * for the checkcondition callback doesn't allow a MAYBE result to be - * returned, but we might have MAYBEs in the gcv->check array. - * Perhaps we should change that API. - */ -static GinTernaryValue -TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase) -{ - GinTernaryValue val1, - val2, - result; - - /* since this function recurses, it could be driven to stack overflow */ - check_stack_depth(); - - if (curitem->type == QI_VAL) - return - checkcondition_gin_internal(gcv, - (QueryOperand *) curitem, - NULL /* don't have position info */ ); - - switch (curitem->qoperator.oper) + /* + * return presence of current entry in indexed value; but TRUE becomes + * MAYBE in the presence of a query requiring recheck + */ + if (gcv->check[j] == GIN_TRUE) { - case OP_NOT: - - /* - * Below a phrase search, force NOT's result to MAYBE. We cannot - * invert a TRUE result from the subexpression to FALSE, since - * TRUE only says that the subexpression matches somewhere, not - * that it matches everywhere, so there might be positions where - * the NOT will match. We could invert FALSE to TRUE, but there's - * little point in distinguishing TRUE from MAYBE, since a recheck - * will have been forced already. - */ - if (in_phrase) - return GIN_MAYBE; - - result = TS_execute_ternary(gcv, curitem + 1, in_phrase); - if (result == GIN_MAYBE) - return result; - return !result; - - case OP_PHRASE: - - /* - * GIN doesn't contain any information about positions, so treat - * OP_PHRASE as OP_AND with recheck requirement, and always - * reporting MAYBE not TRUE. - */ - *(gcv->need_recheck) = true; - /* Pass down in_phrase == true in case there's a NOT below */ - in_phrase = true; - - /* FALL THRU */ - - case OP_AND: - val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left, - in_phrase); - if (val1 == GIN_FALSE) - return GIN_FALSE; - val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase); - if (val2 == GIN_FALSE) - return GIN_FALSE; - if (val1 == GIN_TRUE && val2 == GIN_TRUE && - curitem->qoperator.oper != OP_PHRASE) - return GIN_TRUE; - else - return GIN_MAYBE; - - case OP_OR: - val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left, - in_phrase); - if (val1 == GIN_TRUE) - return GIN_TRUE; - val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase); - if (val2 == GIN_TRUE) - return GIN_TRUE; - if (val1 == GIN_FALSE && val2 == GIN_FALSE) - return GIN_FALSE; - else - return GIN_MAYBE; - - default: - elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); + if (val->weight != 0 || data != NULL) + return TS_MAYBE; } - /* not reachable, but keep compiler quiet */ - return false; + /* + * We rely on GinTernaryValue and TSTernaryValue using equivalent value + * assignments. We could use a switch statement to map the values if that + * ever stops being true, but it seems unlikely to happen. + */ + return (TSTernaryValue) gcv->check[j]; } Datum @@ -370,10 +284,11 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS) gcv.map_item_operand = (int *) (extra_data[0]); gcv.need_recheck = &recheck; - res = TS_execute_ternary(&gcv, GETQUERY(query), false); - - if (res == GIN_TRUE && recheck) - res = GIN_MAYBE; + if (TS_execute(GETQUERY(query), + &gcv, + TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS, + checkcondition_gin)) + res = recheck ? GIN_MAYBE : GIN_TRUE; } PG_RETURN_GIN_TERNARY_VALUE(res); diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c index c3f25800e7b..927aed91564 100644 --- a/src/backend/utils/adt/tsgistidx.c +++ b/src/backend/utils/adt/tsgistidx.c @@ -273,9 +273,9 @@ typedef struct } CHKVAL; /* - * is there value 'val' in array or not ? + * TS_execute callback for matching a tsquery operand to GIST leaf-page data */ -static bool +static TSTernaryValue checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data) { int32 *StopLow = ((CHKVAL *) checkval)->arrb; @@ -288,23 +288,26 @@ checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data) * we are not able to find a prefix by hash value */ if (val->prefix) - return true; + return TS_MAYBE; while (StopLow < StopHigh) { StopMiddle = StopLow + (StopHigh - StopLow) / 2; if (*StopMiddle == val->valcrc) - return true; + return TS_MAYBE; else if (*StopMiddle < val->valcrc) StopLow = StopMiddle + 1; else StopHigh = StopMiddle; } - return false; + return TS_NO; } -static bool +/* + * TS_execute callback for matching a tsquery operand to GIST non-leaf data + */ +static TSTernaryValue checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data) { void *key = (SignTSVector *) checkval; @@ -313,8 +316,12 @@ checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data) * we are not able to find a prefix in signature tree */ if (val->prefix) - return true; - return GETBIT(GETSIGN(key), HASHVAL(val->valcrc, GETSIGLEN(key))); + return TS_MAYBE; + + if (GETBIT(GETSIGN(key), HASHVAL(val->valcrc, GETSIGLEN(key)))) + return TS_MAYBE; + else + return TS_NO; } Datum @@ -339,10 +346,9 @@ gtsvector_consistent(PG_FUNCTION_ARGS) if (ISALLTRUE(key)) PG_RETURN_BOOL(true); - /* since signature is lossy, cannot specify CALC_NOT here */ PG_RETURN_BOOL(TS_execute(GETQUERY(query), key, - TS_EXEC_PHRASE_NO_POS, + TS_EXEC_PHRASE_NO_POS | TS_EXEC_CALC_NOT, checkcondition_bit)); } else diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c index 07251dd577c..cbd97abccde 100644 --- a/src/backend/utils/adt/tsrank.c +++ b/src/backend/utils/adt/tsrank.c @@ -556,14 +556,18 @@ typedef struct #define QR_GET_OPERAND_DATA(q, v) \ ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) ) -static bool -checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data) +/* + * TS_execute callback for matching a tsquery operand to QueryRepresentation + */ +static TSTernaryValue +checkcondition_QueryOperand(void *checkval, QueryOperand *val, + ExecPhraseData *data) { QueryRepresentation *qr = (QueryRepresentation *) checkval; QueryRepresentationOperand *opData = QR_GET_OPERAND_DATA(qr, val); if (!opData->operandexists) - return false; + return TS_NO; if (data) { @@ -573,7 +577,7 @@ checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *d data->pos += MAXQROPOS - opData->npos; } - return true; + return TS_YES; } typedef struct diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index 51619c396c7..6df943abd4e 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -67,14 +67,6 @@ typedef struct StatEntry *root; } TSVectorStat; -/* TS_execute requires ternary logic to handle NOT with phrase matches */ -typedef enum -{ - TS_NO, /* definitely no match */ - TS_YES, /* definitely does match */ - TS_MAYBE /* can't verify match for lack of pos data */ -} TSTernaryValue; - static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags, @@ -1188,13 +1180,15 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix) /* * Check weight info or/and fill 'data' with the required positions */ -static bool +static TSTernaryValue checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val, ExecPhraseData *data) { - bool result = false; + TSTernaryValue result = TS_NO; - if (entry->haspos && (val->weight || data)) + Assert(data == NULL || data->npos == 0); + + if (entry->haspos) { WordEntryPosVector *posvec; @@ -1232,7 +1226,13 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val, data->npos = dptr - data->pos; if (data->npos > 0) - result = true; + result = TS_YES; + else + { + pfree(data->pos); + data->pos = NULL; + data->allocated = false; + } } else if (val->weight) { @@ -1243,40 +1243,57 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val, { if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter))) { - result = true; + result = TS_YES; break; /* no need to go further */ } posvec_iter++; } } - else /* data != NULL */ + else if (data) { data->npos = posvec->npos; data->pos = posvec->pos; data->allocated = false; - result = true; + result = TS_YES; + } + else + { + /* simplest case: no weight check, positions not needed */ + result = TS_YES; } } else { - result = true; + /* + * Position info is lacking, so if the caller requires it, we can only + * say that maybe there is a match. + * + * Notice, however, that we *don't* check val->weight here. + * Historically, stripped tsvectors are considered to match queries + * whether or not the query has a weight restriction; that's a little + * dubious but we'll preserve the behavior. + */ + if (data) + result = TS_MAYBE; + else + result = TS_YES; } return result; } /* - * is there value 'val' in array or not ? + * TS_execute callback for matching a tsquery operand to plain tsvector data */ -static bool +static TSTernaryValue checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data) { CHKVAL *chkval = (CHKVAL *) checkval; WordEntry *StopLow = chkval->arrb; WordEntry *StopHigh = chkval->arre; WordEntry *StopMiddle = StopHigh; - bool res = false; + TSTernaryValue res = TS_NO; /* Loop invariant: StopLow <= val < StopHigh */ while (StopLow < StopHigh) @@ -1302,36 +1319,69 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data) StopHigh = StopMiddle; } - if ((!res || data) && val->prefix) + /* + * If it's a prefix search, we should also consider lexemes that the + * search term is a prefix of (which will necessarily immediately follow + * the place we found in the above loop). But we can skip them if there + * was a definite match on the exact term AND the caller doesn't need + * position info. + */ + if (val->prefix && (res != TS_YES || data)) { WordEntryPos *allpos = NULL; int npos = 0, totalpos = 0; - /* - * there was a failed exact search, so we should scan further to find - * a prefix match. We also need to do so if caller needs position info - */ + /* adjust start position for corner case */ if (StopLow >= StopHigh) StopMiddle = StopHigh; - while ((!res || data) && StopMiddle < chkval->arre && + /* we don't try to re-use any data from the initial match */ + if (data) + { + if (data->allocated) + pfree(data->pos); + data->pos = NULL; + data->allocated = false; + data->npos = 0; + } + res = TS_NO; + + while ((res != TS_YES || data) && + StopMiddle < chkval->arre && tsCompareString(chkval->operand + val->distance, val->length, chkval->values + StopMiddle->pos, StopMiddle->len, true) == 0) { - if (data) - { - /* - * We need to join position information - */ - res = checkclass_str(chkval, StopMiddle, val, data); + TSTernaryValue subres; + + subres = checkclass_str(chkval, StopMiddle, val, data); - if (res) + if (subres != TS_NO) + { + if (data) { - while (npos + data->npos >= totalpos) + /* + * We need to join position information + */ + if (subres == TS_MAYBE) + { + /* + * No position info for this match, so we must report + * MAYBE overall. + */ + res = TS_MAYBE; + /* forget any previous positions */ + npos = 0; + /* don't leak storage */ + if (allpos) + pfree(allpos); + break; + } + + while (npos + data->npos > totalpos) { if (totalpos == 0) { @@ -1347,22 +1397,27 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data) memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos); npos += data->npos; + + /* don't leak storage from individual matches */ + if (data->allocated) + pfree(data->pos); + data->pos = NULL; + data->allocated = false; + /* it's important to reset data->npos before next loop */ + data->npos = 0; } else { - /* at loop exit, res must be true if we found matches */ - res = (npos > 0); + /* Don't need positions, just handle YES/MAYBE */ + if (subres == TS_YES || res == TS_NO) + res = subres; } } - else - { - res = checkclass_str(chkval, StopMiddle, val, NULL); - } StopMiddle++; } - if (res && data) + if (data && npos > 0) { /* Sort and make unique array of found positions */ data->pos = allpos; @@ -1370,6 +1425,7 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data) data->npos = qunique(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos); data->allocated = true; + res = TS_YES; } } @@ -1561,14 +1617,7 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, check_stack_depth(); if (curitem->type == QI_VAL) - { - if (!chkcond(arg, (QueryOperand *) curitem, data)) - return TS_NO; - if (data->npos > 0 || data->negate) - return TS_YES; - /* If we have no position data, we must return TS_MAYBE */ - return TS_MAYBE; - } + return chkcond(arg, (QueryOperand *) curitem, data); switch (curitem->qoperator.oper) { @@ -1821,7 +1870,7 @@ TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags, if (curitem->type == QI_VAL) return chkcond(arg, (QueryOperand *) curitem, - NULL /* don't need position info */ ) ? TS_YES : TS_NO; + NULL /* don't need position info */ ); switch (curitem->qoperator.oper) { |