diff options
author | Andrew Dunstan <andrew@dunslane.net> | 2017-03-31 14:26:03 -0400 |
---|---|---|
committer | Andrew Dunstan <andrew@dunslane.net> | 2017-03-31 14:26:03 -0400 |
commit | e306df7f9cd6b4433273e006df11bdc966b7079e (patch) | |
tree | 9afb3d3ca47524d12b936c2fdfa37427eeaf62ea /src/backend | |
parent | c80b9920fcbcbf75e3d7e8fe092bf6e15d9d40b8 (diff) | |
download | postgresql-e306df7f9cd6b4433273e006df11bdc966b7079e.tar.gz postgresql-e306df7f9cd6b4433273e006df11bdc966b7079e.zip |
Full Text Search support for json and jsonb
The new functions are ts_headline() and to_tsvector.
Dmitry Dolgov, edited and documented by me.
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/tsearch/to_tsany.c | 138 | ||||
-rw-r--r-- | src/backend/tsearch/wparser.c | 190 |
2 files changed, 328 insertions, 0 deletions
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c index 398a781c037..93c08bcf85e 100644 --- a/src/backend/tsearch/to_tsany.c +++ b/src/backend/tsearch/to_tsany.c @@ -16,6 +16,7 @@ #include "tsearch/ts_cache.h" #include "tsearch/ts_utils.h" #include "utils/builtins.h" +#include "utils/jsonapi.h" typedef struct MorphOpaque @@ -24,6 +25,14 @@ typedef struct MorphOpaque int qoperator; /* query operator */ } MorphOpaque; +typedef struct TSVectorBuildState +{ + ParsedText *prs; + TSVector result; + Oid cfgId; +} TSVectorBuildState; + +static void add_to_tsvector(void *state, char *elem_value, int elem_len); Datum get_current_ts_config(PG_FUNCTION_ARGS) @@ -256,6 +265,135 @@ to_tsvector(PG_FUNCTION_ARGS) PointerGetDatum(in))); } +Datum +jsonb_to_tsvector_byid(PG_FUNCTION_ARGS) +{ + Oid cfgId = PG_GETARG_OID(0); + Jsonb *jb = PG_GETARG_JSONB(1); + TSVectorBuildState state; + ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText)); + + prs->words = NULL; + state.result = NULL; + state.cfgId = cfgId; + state.prs = prs; + + iterate_jsonb_string_values(jb, &state, (JsonIterateStringValuesAction) add_to_tsvector); + + PG_FREE_IF_COPY(jb, 1); + + if (state.result == NULL) + { + /* There weren't any string elements in jsonb, + * so wee need to return an empty vector */ + + if (prs->words != NULL) + pfree(prs->words); + + state.result = palloc(CALCDATASIZE(0, 0)); + SET_VARSIZE(state.result, CALCDATASIZE(0, 0)); + state.result->size = 0; + } + + PG_RETURN_TSVECTOR(state.result); +} + +Datum +jsonb_to_tsvector(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + Oid cfgId; + + cfgId = getTSCurrentConfig(true); + PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid, + ObjectIdGetDatum(cfgId), + JsonbGetDatum(jb))); +} + +Datum +json_to_tsvector_byid(PG_FUNCTION_ARGS) +{ + Oid cfgId = PG_GETARG_OID(0); + text *json = PG_GETARG_TEXT_P(1); + TSVectorBuildState state; + ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText)); + + prs->words = NULL; + state.result = NULL; + state.cfgId = cfgId; + state.prs = prs; + + iterate_json_string_values(json, &state, (JsonIterateStringValuesAction) add_to_tsvector); + + PG_FREE_IF_COPY(json, 1); + if (state.result == NULL) + { + /* There weren't any string elements in json, + * so wee need to return an empty vector */ + + if (prs->words != NULL) + pfree(prs->words); + + state.result = palloc(CALCDATASIZE(0, 0)); + SET_VARSIZE(state.result, CALCDATASIZE(0, 0)); + state.result->size = 0; + } + + PG_RETURN_TSVECTOR(state.result); +} + +Datum +json_to_tsvector(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_P(0); + Oid cfgId; + + cfgId = getTSCurrentConfig(true); + PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid, + ObjectIdGetDatum(cfgId), + PointerGetDatum(json))); +} + +/* + * Extend current TSVector from _state with a new one, + * build over a json(b) element. + */ +static void +add_to_tsvector(void *_state, char *elem_value, int elem_len) +{ + TSVectorBuildState *state = (TSVectorBuildState *) _state; + ParsedText *prs = state->prs; + TSVector item_vector; + int i; + + prs->lenwords = elem_len / 6; + if (prs->lenwords == 0) + prs->lenwords = 2; + + prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords); + prs->curwords = 0; + prs->pos = 0; + + parsetext(state->cfgId, prs, elem_value, elem_len); + + if (prs->curwords) + { + if (state->result != NULL) + { + for (i = 0; i < prs->curwords; i++) + prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP; + + item_vector = make_tsvector(prs); + + state->result = (TSVector) DirectFunctionCall2(tsvector_concat, + TSVectorGetDatum(state->result), + PointerGetDatum(item_vector)); + } + else + state->result = make_tsvector(prs); + } +} + /* * to_tsquery */ diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c index d8f2f65542b..c19937d644a 100644 --- a/src/backend/tsearch/wparser.c +++ b/src/backend/tsearch/wparser.c @@ -20,6 +20,7 @@ #include "tsearch/ts_cache.h" #include "tsearch/ts_utils.h" #include "utils/builtins.h" +#include "utils/jsonapi.h" #include "utils/varlena.h" @@ -31,6 +32,19 @@ typedef struct LexDescr *list; } TSTokenTypeStorage; +/* state for ts_headline_json_* */ +typedef struct HeadlineJsonState +{ + HeadlineParsedText *prs; + TSConfigCacheEntry *cfg; + TSParserCacheEntry *prsobj; + TSQuery query; + List *prsoptions; + bool transformed; +} HeadlineJsonState; + +static text * headline_json_value(void *_state, char *elem_value, int elem_len); + static void tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid) { @@ -363,3 +377,179 @@ ts_headline_opt(PG_FUNCTION_ARGS) PG_GETARG_DATUM(1), PG_GETARG_DATUM(2))); } + +Datum +ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS) +{ + Jsonb *out, *jb = PG_GETARG_JSONB(1); + TSQuery query = PG_GETARG_TSQUERY(2); + text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL; + JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value; + + HeadlineParsedText prs; + HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState)); + + memset(&prs, 0, sizeof(HeadlineParsedText)); + prs.lenwords = 32; + prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords); + + state->prs = &prs; + state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0)); + state->prsobj = lookup_ts_parser_cache(state->cfg->prsId); + state->query = query; + if (opt) + state->prsoptions = deserialize_deflist(PointerGetDatum(opt)); + else + state->prsoptions = NIL; + + if (!OidIsValid(state->prsobj->headlineOid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("text search parser does not support headline creation"))); + + out = transform_jsonb_string_values(jb, state, action); + + PG_FREE_IF_COPY(jb, 1); + PG_FREE_IF_COPY(query, 2); + if (opt) + PG_FREE_IF_COPY(opt, 3); + + pfree(prs.words); + + if (state->transformed) + { + pfree(prs.startsel); + pfree(prs.stopsel); + } + + PG_RETURN_JSONB(out); +} + +Datum +ts_headline_jsonb(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt, + ObjectIdGetDatum(getTSCurrentConfig(true)), + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1))); +} + +Datum +ts_headline_jsonb_byid(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(2))); +} + +Datum +ts_headline_jsonb_opt(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt, + ObjectIdGetDatum(getTSCurrentConfig(true)), + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(2))); +} + +Datum +ts_headline_json_byid_opt(PG_FUNCTION_ARGS) +{ + text *json = PG_GETARG_TEXT_P(1); + TSQuery query = PG_GETARG_TSQUERY(2); + text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL; + text *out; + JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value; + + HeadlineParsedText prs; + HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState)); + + memset(&prs, 0, sizeof(HeadlineParsedText)); + prs.lenwords = 32; + prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords); + + state->prs = &prs; + state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0)); + state->prsobj = lookup_ts_parser_cache(state->cfg->prsId); + state->query = query; + if (opt) + state->prsoptions = deserialize_deflist(PointerGetDatum(opt)); + else + state->prsoptions = NIL; + + if (!OidIsValid(state->prsobj->headlineOid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("text search parser does not support headline creation"))); + + out = transform_json_string_values(json, state, action); + + PG_FREE_IF_COPY(json, 1); + PG_FREE_IF_COPY(query, 2); + if (opt) + PG_FREE_IF_COPY(opt, 3); + pfree(prs.words); + + if (state->transformed) + { + pfree(prs.startsel); + pfree(prs.stopsel); + } + + PG_RETURN_TEXT_P(out); +} + +Datum +ts_headline_json(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt, + ObjectIdGetDatum(getTSCurrentConfig(true)), + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1))); +} + +Datum +ts_headline_json_byid(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(2))); +} + +Datum +ts_headline_json_opt(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt, + ObjectIdGetDatum(getTSCurrentConfig(true)), + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(2))); +} + + +/* + * Return headline in text from, generated from a json(b) element + */ +static text * +headline_json_value(void *_state, char *elem_value, int elem_len) +{ + HeadlineJsonState *state = (HeadlineJsonState *) _state; + + HeadlineParsedText *prs = state->prs; + TSConfigCacheEntry *cfg = state->cfg; + TSParserCacheEntry *prsobj = state->prsobj; + TSQuery query = state->query; + List *prsoptions = state->prsoptions; + + prs->curwords = 0; + hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len); + FunctionCall3(&(prsobj->prsheadline), + PointerGetDatum(prs), + PointerGetDatum(prsoptions), + PointerGetDatum(query)); + + state->transformed = true; + return generateHeadline(prs); +} |