aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorAndrew Dunstan <andrew@dunslane.net>2017-03-31 14:26:03 -0400
committerAndrew Dunstan <andrew@dunslane.net>2017-03-31 14:26:03 -0400
commite306df7f9cd6b4433273e006df11bdc966b7079e (patch)
tree9afb3d3ca47524d12b936c2fdfa37427eeaf62ea /src/backend
parentc80b9920fcbcbf75e3d7e8fe092bf6e15d9d40b8 (diff)
downloadpostgresql-e306df7f9cd6b4433273e006df11bdc966b7079e.tar.gz
postgresql-e306df7f9cd6b4433273e006df11bdc966b7079e.zip
Full Text Search support for json and jsonb
The new functions are ts_headline() and to_tsvector. Dmitry Dolgov, edited and documented by me.
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/tsearch/to_tsany.c138
-rw-r--r--src/backend/tsearch/wparser.c190
2 files changed, 328 insertions, 0 deletions
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 398a781c037..93c08bcf85e 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -16,6 +16,7 @@
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
+#include "utils/jsonapi.h"
typedef struct MorphOpaque
@@ -24,6 +25,14 @@ typedef struct MorphOpaque
int qoperator; /* query operator */
} MorphOpaque;
+typedef struct TSVectorBuildState
+{
+ ParsedText *prs;
+ TSVector result;
+ Oid cfgId;
+} TSVectorBuildState;
+
+static void add_to_tsvector(void *state, char *elem_value, int elem_len);
Datum
get_current_ts_config(PG_FUNCTION_ARGS)
@@ -256,6 +265,135 @@ to_tsvector(PG_FUNCTION_ARGS)
PointerGetDatum(in)));
}
+Datum
+jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ Jsonb *jb = PG_GETARG_JSONB(1);
+ TSVectorBuildState state;
+ ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+ prs->words = NULL;
+ state.result = NULL;
+ state.cfgId = cfgId;
+ state.prs = prs;
+
+ iterate_jsonb_string_values(jb, &state, (JsonIterateStringValuesAction) add_to_tsvector);
+
+ PG_FREE_IF_COPY(jb, 1);
+
+ if (state.result == NULL)
+ {
+ /* There weren't any string elements in jsonb,
+ * so wee need to return an empty vector */
+
+ if (prs->words != NULL)
+ pfree(prs->words);
+
+ state.result = palloc(CALCDATASIZE(0, 0));
+ SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+ state.result->size = 0;
+ }
+
+ PG_RETURN_TSVECTOR(state.result);
+}
+
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB(0);
+ Oid cfgId;
+
+ cfgId = getTSCurrentConfig(true);
+ PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
+ ObjectIdGetDatum(cfgId),
+ JsonbGetDatum(jb)));
+}
+
+Datum
+json_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+ Oid cfgId = PG_GETARG_OID(0);
+ text *json = PG_GETARG_TEXT_P(1);
+ TSVectorBuildState state;
+ ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+ prs->words = NULL;
+ state.result = NULL;
+ state.cfgId = cfgId;
+ state.prs = prs;
+
+ iterate_json_string_values(json, &state, (JsonIterateStringValuesAction) add_to_tsvector);
+
+ PG_FREE_IF_COPY(json, 1);
+ if (state.result == NULL)
+ {
+ /* There weren't any string elements in json,
+ * so wee need to return an empty vector */
+
+ if (prs->words != NULL)
+ pfree(prs->words);
+
+ state.result = palloc(CALCDATASIZE(0, 0));
+ SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+ state.result->size = 0;
+ }
+
+ PG_RETURN_TSVECTOR(state.result);
+}
+
+Datum
+json_to_tsvector(PG_FUNCTION_ARGS)
+{
+ text *json = PG_GETARG_TEXT_P(0);
+ Oid cfgId;
+
+ cfgId = getTSCurrentConfig(true);
+ PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
+ ObjectIdGetDatum(cfgId),
+ PointerGetDatum(json)));
+}
+
+/*
+ * Extend current TSVector from _state with a new one,
+ * build over a json(b) element.
+ */
+static void
+add_to_tsvector(void *_state, char *elem_value, int elem_len)
+{
+ TSVectorBuildState *state = (TSVectorBuildState *) _state;
+ ParsedText *prs = state->prs;
+ TSVector item_vector;
+ int i;
+
+ prs->lenwords = elem_len / 6;
+ if (prs->lenwords == 0)
+ prs->lenwords = 2;
+
+ prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
+ prs->curwords = 0;
+ prs->pos = 0;
+
+ parsetext(state->cfgId, prs, elem_value, elem_len);
+
+ if (prs->curwords)
+ {
+ if (state->result != NULL)
+ {
+ for (i = 0; i < prs->curwords; i++)
+ prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP;
+
+ item_vector = make_tsvector(prs);
+
+ state->result = (TSVector) DirectFunctionCall2(tsvector_concat,
+ TSVectorGetDatum(state->result),
+ PointerGetDatum(item_vector));
+ }
+ else
+ state->result = make_tsvector(prs);
+ }
+}
+
/*
* to_tsquery
*/
diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c
index d8f2f65542b..c19937d644a 100644
--- a/src/backend/tsearch/wparser.c
+++ b/src/backend/tsearch/wparser.c
@@ -20,6 +20,7 @@
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
+#include "utils/jsonapi.h"
#include "utils/varlena.h"
@@ -31,6 +32,19 @@ typedef struct
LexDescr *list;
} TSTokenTypeStorage;
+/* state for ts_headline_json_* */
+typedef struct HeadlineJsonState
+{
+ HeadlineParsedText *prs;
+ TSConfigCacheEntry *cfg;
+ TSParserCacheEntry *prsobj;
+ TSQuery query;
+ List *prsoptions;
+ bool transformed;
+} HeadlineJsonState;
+
+static text * headline_json_value(void *_state, char *elem_value, int elem_len);
+
static void
tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
{
@@ -363,3 +377,179 @@ ts_headline_opt(PG_FUNCTION_ARGS)
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
+
+Datum
+ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
+{
+ Jsonb *out, *jb = PG_GETARG_JSONB(1);
+ TSQuery query = PG_GETARG_TSQUERY(2);
+ text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+ JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
+
+ HeadlineParsedText prs;
+ HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+ memset(&prs, 0, sizeof(HeadlineParsedText));
+ prs.lenwords = 32;
+ prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+ state->prs = &prs;
+ state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+ state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+ state->query = query;
+ if (opt)
+ state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+ else
+ state->prsoptions = NIL;
+
+ if (!OidIsValid(state->prsobj->headlineOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("text search parser does not support headline creation")));
+
+ out = transform_jsonb_string_values(jb, state, action);
+
+ PG_FREE_IF_COPY(jb, 1);
+ PG_FREE_IF_COPY(query, 2);
+ if (opt)
+ PG_FREE_IF_COPY(opt, 3);
+
+ pfree(prs.words);
+
+ if (state->transformed)
+ {
+ pfree(prs.startsel);
+ pfree(prs.stopsel);
+ }
+
+ PG_RETURN_JSONB(out);
+}
+
+Datum
+ts_headline_jsonb(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+ ObjectIdGetDatum(getTSCurrentConfig(true)),
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
+ ObjectIdGetDatum(getTSCurrentConfig(true)),
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
+{
+ text *json = PG_GETARG_TEXT_P(1);
+ TSQuery query = PG_GETARG_TSQUERY(2);
+ text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+ text *out;
+ JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
+
+ HeadlineParsedText prs;
+ HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+ memset(&prs, 0, sizeof(HeadlineParsedText));
+ prs.lenwords = 32;
+ prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+ state->prs = &prs;
+ state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+ state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+ state->query = query;
+ if (opt)
+ state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+ else
+ state->prsoptions = NIL;
+
+ if (!OidIsValid(state->prsobj->headlineOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("text search parser does not support headline creation")));
+
+ out = transform_json_string_values(json, state, action);
+
+ PG_FREE_IF_COPY(json, 1);
+ PG_FREE_IF_COPY(query, 2);
+ if (opt)
+ PG_FREE_IF_COPY(opt, 3);
+ pfree(prs.words);
+
+ if (state->transformed)
+ {
+ pfree(prs.startsel);
+ pfree(prs.stopsel);
+ }
+
+ PG_RETURN_TEXT_P(out);
+}
+
+Datum
+ts_headline_json(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+ ObjectIdGetDatum(getTSCurrentConfig(true)),
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_json_byid(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_opt(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
+ ObjectIdGetDatum(getTSCurrentConfig(true)),
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(2)));
+}
+
+
+/*
+ * Return headline in text from, generated from a json(b) element
+ */
+static text *
+headline_json_value(void *_state, char *elem_value, int elem_len)
+{
+ HeadlineJsonState *state = (HeadlineJsonState *) _state;
+
+ HeadlineParsedText *prs = state->prs;
+ TSConfigCacheEntry *cfg = state->cfg;
+ TSParserCacheEntry *prsobj = state->prsobj;
+ TSQuery query = state->query;
+ List *prsoptions = state->prsoptions;
+
+ prs->curwords = 0;
+ hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
+ FunctionCall3(&(prsobj->prsheadline),
+ PointerGetDatum(prs),
+ PointerGetDatum(prsoptions),
+ PointerGetDatum(query));
+
+ state->transformed = true;
+ return generateHeadline(prs);
+}