aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils')
-rw-r--r--src/backend/utils/adt/tsginidx.c133
-rw-r--r--src/backend/utils/adt/tsgistidx.c26
-rw-r--r--src/backend/utils/adt/tsrank.c12
-rw-r--r--src/backend/utils/adt/tsvector_op.c147
4 files changed, 146 insertions, 172 deletions
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index 2d656168fca..3128f0a7da0 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -178,9 +178,13 @@ typedef struct
bool *need_recheck;
} GinChkVal;
-static GinTernaryValue
-checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data)
+/*
+ * TS_execute callback for matching a tsquery operand to GIN index data
+ */
+static TSTernaryValue
+checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
+ GinChkVal *gcv = (GinChkVal *) checkval;
int j;
/*
@@ -193,112 +197,22 @@ checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *d
/* convert item's number to corresponding entry's (operand's) number */
j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
- /* return presence of current entry in indexed value */
- return gcv->check[j];
-}
-
-/*
- * Wrapper of check condition function for TS_execute.
- */
-static bool
-checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
-{
- return checkcondition_gin_internal((GinChkVal *) checkval,
- val,
- data) != GIN_FALSE;
-}
-
-/*
- * Evaluate tsquery boolean expression using ternary logic.
- *
- * Note: the reason we can't use TS_execute() for this is that its API
- * for the checkcondition callback doesn't allow a MAYBE result to be
- * returned, but we might have MAYBEs in the gcv->check array.
- * Perhaps we should change that API.
- */
-static GinTernaryValue
-TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase)
-{
- GinTernaryValue val1,
- val2,
- result;
-
- /* since this function recurses, it could be driven to stack overflow */
- check_stack_depth();
-
- if (curitem->type == QI_VAL)
- return
- checkcondition_gin_internal(gcv,
- (QueryOperand *) curitem,
- NULL /* don't have position info */ );
-
- switch (curitem->qoperator.oper)
+ /*
+ * return presence of current entry in indexed value; but TRUE becomes
+ * MAYBE in the presence of a query requiring recheck
+ */
+ if (gcv->check[j] == GIN_TRUE)
{
- case OP_NOT:
-
- /*
- * Below a phrase search, force NOT's result to MAYBE. We cannot
- * invert a TRUE result from the subexpression to FALSE, since
- * TRUE only says that the subexpression matches somewhere, not
- * that it matches everywhere, so there might be positions where
- * the NOT will match. We could invert FALSE to TRUE, but there's
- * little point in distinguishing TRUE from MAYBE, since a recheck
- * will have been forced already.
- */
- if (in_phrase)
- return GIN_MAYBE;
-
- result = TS_execute_ternary(gcv, curitem + 1, in_phrase);
- if (result == GIN_MAYBE)
- return result;
- return !result;
-
- case OP_PHRASE:
-
- /*
- * GIN doesn't contain any information about positions, so treat
- * OP_PHRASE as OP_AND with recheck requirement, and always
- * reporting MAYBE not TRUE.
- */
- *(gcv->need_recheck) = true;
- /* Pass down in_phrase == true in case there's a NOT below */
- in_phrase = true;
-
- /* FALL THRU */
-
- case OP_AND:
- val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
- in_phrase);
- if (val1 == GIN_FALSE)
- return GIN_FALSE;
- val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
- if (val2 == GIN_FALSE)
- return GIN_FALSE;
- if (val1 == GIN_TRUE && val2 == GIN_TRUE &&
- curitem->qoperator.oper != OP_PHRASE)
- return GIN_TRUE;
- else
- return GIN_MAYBE;
-
- case OP_OR:
- val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
- in_phrase);
- if (val1 == GIN_TRUE)
- return GIN_TRUE;
- val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
- if (val2 == GIN_TRUE)
- return GIN_TRUE;
- if (val1 == GIN_FALSE && val2 == GIN_FALSE)
- return GIN_FALSE;
- else
- return GIN_MAYBE;
-
- default:
- elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
+ if (val->weight != 0 || data != NULL)
+ return TS_MAYBE;
}
- /* not reachable, but keep compiler quiet */
- return false;
+ /*
+ * We rely on GinTernaryValue and TSTernaryValue using equivalent value
+ * assignments. We could use a switch statement to map the values if that
+ * ever stops being true, but it seems unlikely to happen.
+ */
+ return (TSTernaryValue) gcv->check[j];
}
Datum
@@ -370,10 +284,11 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
gcv.map_item_operand = (int *) (extra_data[0]);
gcv.need_recheck = &recheck;
- res = TS_execute_ternary(&gcv, GETQUERY(query), false);
-
- if (res == GIN_TRUE && recheck)
- res = GIN_MAYBE;
+ if (TS_execute(GETQUERY(query),
+ &gcv,
+ TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS,
+ checkcondition_gin))
+ res = recheck ? GIN_MAYBE : GIN_TRUE;
}
PG_RETURN_GIN_TERNARY_VALUE(res);
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index c3f25800e7b..927aed91564 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -273,9 +273,9 @@ typedef struct
} CHKVAL;
/*
- * is there value 'val' in array or not ?
+ * TS_execute callback for matching a tsquery operand to GIST leaf-page data
*/
-static bool
+static TSTernaryValue
checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
int32 *StopLow = ((CHKVAL *) checkval)->arrb;
@@ -288,23 +288,26 @@ checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data)
* we are not able to find a prefix by hash value
*/
if (val->prefix)
- return true;
+ return TS_MAYBE;
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
if (*StopMiddle == val->valcrc)
- return true;
+ return TS_MAYBE;
else if (*StopMiddle < val->valcrc)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
- return false;
+ return TS_NO;
}
-static bool
+/*
+ * TS_execute callback for matching a tsquery operand to GIST non-leaf data
+ */
+static TSTernaryValue
checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
void *key = (SignTSVector *) checkval;
@@ -313,8 +316,12 @@ checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data)
* we are not able to find a prefix in signature tree
*/
if (val->prefix)
- return true;
- return GETBIT(GETSIGN(key), HASHVAL(val->valcrc, GETSIGLEN(key)));
+ return TS_MAYBE;
+
+ if (GETBIT(GETSIGN(key), HASHVAL(val->valcrc, GETSIGLEN(key))))
+ return TS_MAYBE;
+ else
+ return TS_NO;
}
Datum
@@ -339,10 +346,9 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
if (ISALLTRUE(key))
PG_RETURN_BOOL(true);
- /* since signature is lossy, cannot specify CALC_NOT here */
PG_RETURN_BOOL(TS_execute(GETQUERY(query),
key,
- TS_EXEC_PHRASE_NO_POS,
+ TS_EXEC_PHRASE_NO_POS | TS_EXEC_CALC_NOT,
checkcondition_bit));
}
else
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index 07251dd577c..cbd97abccde 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -556,14 +556,18 @@ typedef struct
#define QR_GET_OPERAND_DATA(q, v) \
( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )
-static bool
-checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data)
+/*
+ * TS_execute callback for matching a tsquery operand to QueryRepresentation
+ */
+static TSTernaryValue
+checkcondition_QueryOperand(void *checkval, QueryOperand *val,
+ ExecPhraseData *data)
{
QueryRepresentation *qr = (QueryRepresentation *) checkval;
QueryRepresentationOperand *opData = QR_GET_OPERAND_DATA(qr, val);
if (!opData->operandexists)
- return false;
+ return TS_NO;
if (data)
{
@@ -573,7 +577,7 @@ checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *d
data->pos += MAXQROPOS - opData->npos;
}
- return true;
+ return TS_YES;
}
typedef struct
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 51619c396c7..6df943abd4e 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -67,14 +67,6 @@ typedef struct
StatEntry *root;
} TSVectorStat;
-/* TS_execute requires ternary logic to handle NOT with phrase matches */
-typedef enum
-{
- TS_NO, /* definitely no match */
- TS_YES, /* definitely does match */
- TS_MAYBE /* can't verify match for lack of pos data */
-} TSTernaryValue;
-
static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
uint32 flags,
@@ -1188,13 +1180,15 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
/*
* Check weight info or/and fill 'data' with the required positions
*/
-static bool
+static TSTernaryValue
checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
ExecPhraseData *data)
{
- bool result = false;
+ TSTernaryValue result = TS_NO;
- if (entry->haspos && (val->weight || data))
+ Assert(data == NULL || data->npos == 0);
+
+ if (entry->haspos)
{
WordEntryPosVector *posvec;
@@ -1232,7 +1226,13 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
data->npos = dptr - data->pos;
if (data->npos > 0)
- result = true;
+ result = TS_YES;
+ else
+ {
+ pfree(data->pos);
+ data->pos = NULL;
+ data->allocated = false;
+ }
}
else if (val->weight)
{
@@ -1243,40 +1243,57 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
{
if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
{
- result = true;
+ result = TS_YES;
break; /* no need to go further */
}
posvec_iter++;
}
}
- else /* data != NULL */
+ else if (data)
{
data->npos = posvec->npos;
data->pos = posvec->pos;
data->allocated = false;
- result = true;
+ result = TS_YES;
+ }
+ else
+ {
+ /* simplest case: no weight check, positions not needed */
+ result = TS_YES;
}
}
else
{
- result = true;
+ /*
+ * Position info is lacking, so if the caller requires it, we can only
+ * say that maybe there is a match.
+ *
+ * Notice, however, that we *don't* check val->weight here.
+ * Historically, stripped tsvectors are considered to match queries
+ * whether or not the query has a weight restriction; that's a little
+ * dubious but we'll preserve the behavior.
+ */
+ if (data)
+ result = TS_MAYBE;
+ else
+ result = TS_YES;
}
return result;
}
/*
- * is there value 'val' in array or not ?
+ * TS_execute callback for matching a tsquery operand to plain tsvector data
*/
-static bool
+static TSTernaryValue
checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
CHKVAL *chkval = (CHKVAL *) checkval;
WordEntry *StopLow = chkval->arrb;
WordEntry *StopHigh = chkval->arre;
WordEntry *StopMiddle = StopHigh;
- bool res = false;
+ TSTernaryValue res = TS_NO;
/* Loop invariant: StopLow <= val < StopHigh */
while (StopLow < StopHigh)
@@ -1302,36 +1319,69 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
StopHigh = StopMiddle;
}
- if ((!res || data) && val->prefix)
+ /*
+ * If it's a prefix search, we should also consider lexemes that the
+ * search term is a prefix of (which will necessarily immediately follow
+ * the place we found in the above loop). But we can skip them if there
+ * was a definite match on the exact term AND the caller doesn't need
+ * position info.
+ */
+ if (val->prefix && (res != TS_YES || data))
{
WordEntryPos *allpos = NULL;
int npos = 0,
totalpos = 0;
- /*
- * there was a failed exact search, so we should scan further to find
- * a prefix match. We also need to do so if caller needs position info
- */
+ /* adjust start position for corner case */
if (StopLow >= StopHigh)
StopMiddle = StopHigh;
- while ((!res || data) && StopMiddle < chkval->arre &&
+ /* we don't try to re-use any data from the initial match */
+ if (data)
+ {
+ if (data->allocated)
+ pfree(data->pos);
+ data->pos = NULL;
+ data->allocated = false;
+ data->npos = 0;
+ }
+ res = TS_NO;
+
+ while ((res != TS_YES || data) &&
+ StopMiddle < chkval->arre &&
tsCompareString(chkval->operand + val->distance,
val->length,
chkval->values + StopMiddle->pos,
StopMiddle->len,
true) == 0)
{
- if (data)
- {
- /*
- * We need to join position information
- */
- res = checkclass_str(chkval, StopMiddle, val, data);
+ TSTernaryValue subres;
+
+ subres = checkclass_str(chkval, StopMiddle, val, data);
- if (res)
+ if (subres != TS_NO)
+ {
+ if (data)
{
- while (npos + data->npos >= totalpos)
+ /*
+ * We need to join position information
+ */
+ if (subres == TS_MAYBE)
+ {
+ /*
+ * No position info for this match, so we must report
+ * MAYBE overall.
+ */
+ res = TS_MAYBE;
+ /* forget any previous positions */
+ npos = 0;
+ /* don't leak storage */
+ if (allpos)
+ pfree(allpos);
+ break;
+ }
+
+ while (npos + data->npos > totalpos)
{
if (totalpos == 0)
{
@@ -1347,22 +1397,27 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
npos += data->npos;
+
+ /* don't leak storage from individual matches */
+ if (data->allocated)
+ pfree(data->pos);
+ data->pos = NULL;
+ data->allocated = false;
+ /* it's important to reset data->npos before next loop */
+ data->npos = 0;
}
else
{
- /* at loop exit, res must be true if we found matches */
- res = (npos > 0);
+ /* Don't need positions, just handle YES/MAYBE */
+ if (subres == TS_YES || res == TS_NO)
+ res = subres;
}
}
- else
- {
- res = checkclass_str(chkval, StopMiddle, val, NULL);
- }
StopMiddle++;
}
- if (res && data)
+ if (data && npos > 0)
{
/* Sort and make unique array of found positions */
data->pos = allpos;
@@ -1370,6 +1425,7 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
compareWordEntryPos);
data->allocated = true;
+ res = TS_YES;
}
}
@@ -1561,14 +1617,7 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
check_stack_depth();
if (curitem->type == QI_VAL)
- {
- if (!chkcond(arg, (QueryOperand *) curitem, data))
- return TS_NO;
- if (data->npos > 0 || data->negate)
- return TS_YES;
- /* If we have no position data, we must return TS_MAYBE */
- return TS_MAYBE;
- }
+ return chkcond(arg, (QueryOperand *) curitem, data);
switch (curitem->qoperator.oper)
{
@@ -1821,7 +1870,7 @@ TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
if (curitem->type == QI_VAL)
return chkcond(arg, (QueryOperand *) curitem,
- NULL /* don't need position info */ ) ? TS_YES : TS_NO;
+ NULL /* don't need position info */ );
switch (curitem->qoperator.oper)
{