aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/tsvector_op.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/tsvector_op.c')
-rw-r--r--src/backend/utils/adt/tsvector_op.c181
1 files changed, 178 insertions, 3 deletions
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index c594c18a3ae..a38db4697d3 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -71,6 +71,10 @@ typedef struct
static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
uint32 flags,
TSExecuteCallback chkcond);
+static bool TS_execute_locations_recurse(QueryItem *curitem,
+ void *arg,
+ TSExecuteCallback chkcond,
+ List **locations);
static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
@@ -1571,10 +1575,10 @@ TS_phrase_output(ExecPhraseData *data,
* In addition to the same arguments used for TS_execute, the caller may pass
* a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
* match position info on success. data == NULL if no position data need be
- * returned. (In practice, outside callers pass NULL, and only the internal
- * recursion cases pass a data pointer.)
+ * returned.
* Note: the function assumes data != NULL for operators other than OP_PHRASE.
- * This is OK because an outside call always starts from an OP_PHRASE node.
+ * This is OK because an outside call always starts from an OP_PHRASE node,
+ * and all internal recursion cases pass data != NULL.
*
* The detailed semantics of the match data, given that the function returned
* TS_YES (successful match), are:
@@ -1972,6 +1976,177 @@ TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
}
/*
+ * Evaluate tsquery and report locations of matching terms.
+ *
+ * This is like TS_execute except that it returns match locations not just
+ * success/failure status. The callback function is required to provide
+ * position data (we report failure if it doesn't).
+ *
+ * On successful match, the result is a List of ExecPhraseData structs, one
+ * for each AND'ed term or phrase operator in the query. Each struct includes
+ * a sorted array of lexeme positions matching that term. (Recall that for
+ * phrase operators, the match includes width+1 lexemes, and the recorded
+ * position is that of the rightmost lexeme.)
+ *
+ * OR subexpressions are handled by union'ing their match locations into a
+ * single List element, which is valid since any of those locations contains
+ * a match. However, when some of the OR'ed terms are phrase operators, we
+ * report the maximum width of any of the OR'ed terms, making such cases
+ * slightly imprecise in the conservative direction. (For example, if the
+ * tsquery is "(A <-> B) | C", an occurrence of C in the data would be
+ * reported as though it includes the lexeme to the left of C.)
+ *
+ * Locations of NOT subexpressions are not reported. (Obviously, there can
+ * be no successful NOT matches at top level, or the match would have failed.
+ * So this amounts to ignoring NOTs underneath ORs.)
+ *
+ * The result is NIL if no match, or if position data was not returned.
+ *
+ * Arguments are the same as for TS_execute, although flags is currently
+ * vestigial since none of the defined bits are sensible here.
+ */
+List *
+TS_execute_locations(QueryItem *curitem, void *arg,
+ uint32 flags,
+ TSExecuteCallback chkcond)
+{
+ List *result;
+
+ /* No flags supported, as yet */
+ Assert(flags == TS_EXEC_EMPTY);
+ if (TS_execute_locations_recurse(curitem, arg, chkcond, &result))
+ return result;
+ return NIL;
+}
+
+/*
+ * TS_execute_locations recursion for operators above any phrase operator.
+ * OP_PHRASE subexpressions can be passed off to TS_phrase_execute.
+ */
+static bool
+TS_execute_locations_recurse(QueryItem *curitem, void *arg,
+ TSExecuteCallback chkcond,
+ List **locations)
+{
+ bool lmatch,
+ rmatch;
+ List *llocations,
+ *rlocations;
+ ExecPhraseData *data;
+
+ /* since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
+ /* ... and let's check for query cancel while we're at it */
+ CHECK_FOR_INTERRUPTS();
+
+ /* Default locations result is empty */
+ *locations = NIL;
+
+ if (curitem->type == QI_VAL)
+ {
+ data = palloc0_object(ExecPhraseData);
+ if (chkcond(arg, (QueryOperand *) curitem, data) == TS_YES)
+ {
+ *locations = list_make1(data);
+ return true;
+ }
+ pfree(data);
+ return false;
+ }
+
+ switch (curitem->qoperator.oper)
+ {
+ case OP_NOT:
+ if (!TS_execute_locations_recurse(curitem + 1, arg, chkcond,
+ &llocations))
+ return true; /* we don't pass back any locations */
+ return false;
+
+ case OP_AND:
+ if (!TS_execute_locations_recurse(curitem + curitem->qoperator.left,
+ arg, chkcond,
+ &llocations))
+ return false;
+ if (!TS_execute_locations_recurse(curitem + 1,
+ arg, chkcond,
+ &rlocations))
+ return false;
+ *locations = list_concat(llocations, rlocations);
+ return true;
+
+ case OP_OR:
+ lmatch = TS_execute_locations_recurse(curitem + curitem->qoperator.left,
+ arg, chkcond,
+ &llocations);
+ rmatch = TS_execute_locations_recurse(curitem + 1,
+ arg, chkcond,
+ &rlocations);
+ if (lmatch || rmatch)
+ {
+ /*
+ * We generate an AND'able location struct from each
+ * combination of sub-matches, following the disjunctive law
+ * (A & B) | (C & D) = (A | C) & (A | D) & (B | C) & (B | D).
+ *
+ * However, if either input didn't produce locations (i.e., it
+ * failed or was a NOT), we must just return the other list.
+ */
+ if (llocations == NIL)
+ *locations = rlocations;
+ else if (rlocations == NIL)
+ *locations = llocations;
+ else
+ {
+ ListCell *ll;
+
+ foreach(ll, llocations)
+ {
+ ExecPhraseData *ldata = (ExecPhraseData *) lfirst(ll);
+ ListCell *lr;
+
+ foreach(lr, rlocations)
+ {
+ ExecPhraseData *rdata = (ExecPhraseData *) lfirst(lr);
+
+ data = palloc0_object(ExecPhraseData);
+ (void) TS_phrase_output(data, ldata, rdata,
+ TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
+ 0, 0,
+ ldata->npos + rdata->npos);
+ /* Report the larger width, as explained above. */
+ data->width = Max(ldata->width, rdata->width);
+ *locations = lappend(*locations, data);
+ }
+ }
+ }
+
+ return true;
+ }
+ return false;
+
+ case OP_PHRASE:
+ /* We can hand this off to TS_phrase_execute */
+ data = palloc0_object(ExecPhraseData);
+ if (TS_phrase_execute(curitem, arg, TS_EXEC_EMPTY, chkcond,
+ data) == TS_YES)
+ {
+ if (!data->negate)
+ *locations = list_make1(data);
+ return true;
+ }
+ pfree(data);
+ return false;
+
+ default:
+ elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
+ }
+
+ /* not reachable, but keep compiler quiet */
+ return false;
+}
+
+/*
* Detect whether a tsquery boolean expression requires any positive matches
* to values shown in the tsquery.
*