aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2022-11-21 17:07:07 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2022-11-21 17:07:07 -0500
commit1aed4c4fd22b5643ac681be3d9617c4c7b1b0a19 (patch)
tree9d73db08f30580512db2667f58b45fbb555de904 /src/backend
parent4cbcb7ed85b0e235300059dbbbdb83aec4ec8eaf (diff)
downloadpostgresql-1aed4c4fd22b5643ac681be3d9617c4c7b1b0a19.tar.gz
postgresql-1aed4c4fd22b5643ac681be3d9617c4c7b1b0a19.zip
Add comments and a missing CHECK_FOR_INTERRUPTS in ts_headline.
I just spent an annoying amount of time reverse-engineering the 100%-undocumented API between ts_headline and the text search parser's prsheadline function. Add some commentary about that while it's fresh in mind. Also remove some unused macros in wparser_def.c. While at it, I noticed that when commit 78e73e875 added a CHECK_FOR_INTERRUPTS call in TS_execute_recurse, it missed doing so in the parallel function TS_phrase_execute, which surely needs one just as much. Back-patch because of the missing CHECK_FOR_INTERRUPTS. Might as well back-patch the rest of this too.
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/tsearch/ts_parse.c13
-rw-r--r--src/backend/tsearch/wparser_def.c4
-rw-r--r--src/backend/utils/adt/tsvector_op.c3
3 files changed, 16 insertions, 4 deletions
diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c
index 171ade273bf..190270ab3d6 100644
--- a/src/backend/tsearch/ts_parse.c
+++ b/src/backend/tsearch/ts_parse.c
@@ -437,6 +437,8 @@ parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
/*
* Headline framework
*/
+
+/* Add a word to prs->words[] */
static void
hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
{
@@ -453,6 +455,14 @@ hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
prs->curwords++;
}
+/*
+ * Add pos and matching-query-item data to the just-added word.
+ * Here, buf/buflen represent a processed lexeme, not raw token text.
+ *
+ * If the query contains more than one matching item, we replicate
+ * the last-added word so that each item can be pointed to. The
+ * duplicate entries are marked with repeated = 1.
+ */
static void
hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
{
@@ -594,6 +604,9 @@ hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int bu
FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
}
+/*
+ * Generate the headline, as a text object, from HeadlineParsedText.
+ */
text *
generateHeadline(HeadlineParsedText *prs)
{
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 70824ebf640..53a545863aa 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -1921,10 +1921,6 @@ prsd_end(PG_FUNCTION_ARGS)
*/
/* token type classification macros */
-#define LEAVETOKEN(x) ( (x)==SPACE )
-#define COMPLEXTOKEN(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
-#define ENDPUNCTOKEN(x) ( (x)==SPACE )
-
#define TS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
#define HLIDREPLACE(x) ( (x)==TAG_T )
#define HLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 143a37f3788..a6fcc29b118 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -1607,6 +1607,9 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
+ /* ... and let's check for query cancel while we're at it */
+ CHECK_FOR_INTERRUPTS();
+
if (curitem->type == QI_VAL)
{
if (!chkcond(arg, (QueryOperand *) curitem, data))