aboutsummaryrefslogtreecommitdiff
path: root/contrib/tsearch2/ts_cfg.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/tsearch2/ts_cfg.c')
-rw-r--r--contrib/tsearch2/ts_cfg.c509
1 files changed, 509 insertions, 0 deletions
diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c
new file mode 100644
index 00000000000..7c9f20c8de9
--- /dev/null
+++ b/contrib/tsearch2/ts_cfg.c
@@ -0,0 +1,509 @@
+/*
+ * interface functions to tscfg
+ * Teodor Sigaev <teodor@sigaev.ru>
+ */
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <locale.h>
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "utils/array.h"
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+
+#include "ts_cfg.h"
+#include "dict.h"
+#include "wparser.h"
+#include "snmap.h"
+#include "common.h"
+#include "tsvector.h"
+
+/*********top interface**********/
+
+static void *plan_getcfg_bylocale=NULL;
+static void *plan_getcfg=NULL;
+static void *plan_getmap=NULL;
+static void *plan_name2id=NULL;
+static Oid current_cfg_id=0;
+
+void
+init_cfg(Oid id, TSCfgInfo *cfg) {
+ Oid arg[2]={ OIDOID, OIDOID };
+ bool isnull;
+ Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
+ int stat,i,j;
+ text *ptr;
+ text *prsname=NULL;
+ MemoryContext oldcontext;
+
+ memset(cfg,0,sizeof(TSCfgInfo));
+ SPI_connect();
+ if ( !plan_getcfg ) {
+ plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
+ if ( !plan_getcfg )
+ ts_error(ERROR, "SPI_prepare() failed");
+ }
+
+ stat = SPI_execp(plan_getcfg, pars, " ", 1);
+ if ( stat < 0 )
+ ts_error (ERROR, "SPI_execp return %d", stat);
+ if ( SPI_processed > 0 ) {
+ prsname = (text*) DatumGetPointer(
+ SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)
+ );
+ oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+ prsname = ptextdup( prsname );
+ MemoryContextSwitchTo(oldcontext);
+
+ cfg->id=id;
+ } else
+ ts_error(ERROR, "No tsearch cfg with id %d", id);
+
+ arg[0]=TEXTOID;
+ if ( !plan_getmap ) {
+ plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
+ if ( !plan_getmap )
+ ts_error(ERROR, "SPI_prepare() failed");
+ }
+
+ pars[0]=PointerGetDatum( prsname );
+ stat = SPI_execp(plan_getmap, pars, " ", 0);
+ if ( stat < 0 )
+ ts_error (ERROR, "SPI_execp return %d", stat);
+ if ( SPI_processed <= 0 )
+ ts_error(ERROR, "No parser with id %d", id);
+
+ for(i=0;i<SPI_processed;i++) {
+ int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
+ ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
+ ArrayType *a;
+
+ if ( !cfg->map ) {
+ cfg->len=lexid+1;
+ cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
+ if ( !cfg->map )
+ ts_error(ERROR,"No memory");
+ memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
+ }
+
+ if (isnull)
+ continue;
+
+ a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
+
+ if ( ARR_NDIM(a) != 1 )
+ ts_error(ERROR,"Wrong dimension");
+ if ( ARRNELEMS(a) < 1 )
+ continue;
+
+ cfg->map[lexid].len=ARRNELEMS(a);
+ cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
+ memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
+ ptr=(text*)ARR_DATA_PTR(a);
+ oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+ for(j=0;j<cfg->map[lexid].len;j++) {
+ cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
+ ptr=NEXTVAL(ptr);
+ }
+ MemoryContextSwitchTo(oldcontext);
+
+ if ( a != toasted_a )
+ pfree(a);
+ }
+
+ SPI_finish();
+ cfg->prs_id = name2id_prs( prsname );
+ pfree(prsname);
+ for(i=0;i<cfg->len;i++) {
+ for(j=0;j<cfg->map[i].len;j++) {
+ ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
+ cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
+ pfree(ptr);
+ }
+ }
+}
+
+typedef struct {
+ TSCfgInfo *last_cfg;
+ int len;
+ int reallen;
+ TSCfgInfo *list;
+ SNMap name2id_map;
+} CFGList;
+
+static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
+
+void
+reset_cfg(void) {
+ freeSNMap( &(CList.name2id_map) );
+ if ( CList.list ) {
+ int i,j;
+ for(i=0;i<CList.len;i++)
+ if ( CList.list[i].map ) {
+ for(j=0;j<CList.list[i].len;j++)
+ if ( CList.list[i].map[j].dict_id )
+ free(CList.list[i].map[j].dict_id);
+ free( CList.list[i].map );
+ }
+ free(CList.list);
+ }
+ memset(&CList,0,sizeof(CFGList));
+}
+
+static int
+comparecfg(const void *a, const void *b) {
+ return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
+}
+
+TSCfgInfo *
+findcfg(Oid id) {
+ /* last used cfg */
+ if ( CList.last_cfg && CList.last_cfg->id==id )
+ return CList.last_cfg;
+
+ /* already used cfg */
+ if ( CList.len != 0 ) {
+ TSCfgInfo key;
+ key.id=id;
+ CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+ if ( CList.last_cfg != NULL )
+ return CList.last_cfg;
+ }
+
+ /* last chance */
+ if ( CList.len==CList.reallen ) {
+ TSCfgInfo *tmp;
+ int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
+ tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
+ if ( !tmp )
+ ts_error(ERROR,"No memory");
+ CList.reallen=reallen;
+ CList.list=tmp;
+ }
+ CList.last_cfg=&(CList.list[CList.len]);
+ init_cfg(id, CList.last_cfg);
+ CList.len++;
+ qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
+ return findcfg(id); /* qsort changed order!! */;
+}
+
+
+Oid
+name2id_cfg(text *name) {
+ Oid arg[1]={ TEXTOID };
+ bool isnull;
+ Datum pars[1]={ PointerGetDatum(name) };
+ int stat;
+ Oid id=findSNMap_t( &(CList.name2id_map), name );
+
+ if ( id )
+ return id;
+
+ SPI_connect();
+ if ( !plan_name2id ) {
+ plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
+ if ( !plan_name2id )
+ elog(ERROR, "SPI_prepare() failed");
+ }
+
+ stat = SPI_execp(plan_name2id, pars, " ", 1);
+ if ( stat < 0 )
+ elog (ERROR, "SPI_execp return %d", stat);
+ if ( SPI_processed > 0 ) {
+ id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+ if ( isnull )
+ elog(ERROR, "Null id for tsearch config");
+ } else
+ elog(ERROR, "No tsearch config");
+ SPI_finish();
+ addSNMap_t( &(CList.name2id_map), name, id );
+ return id;
+}
+
+
+void
+parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
+ int type, lenlemm, i;
+ char *lemm=NULL;
+ WParserInfo *prsobj = findprs(cfg->prs_id);
+
+ prsobj->prs=(void*)DatumGetPointer(
+ FunctionCall2(
+ &(prsobj->start_info),
+ PointerGetDatum(buf),
+ Int32GetDatum(buflen)
+ )
+ );
+
+ while( ( type=DatumGetInt32(FunctionCall3(
+ &(prsobj->getlexeme_info),
+ PointerGetDatum(prsobj->prs),
+ PointerGetDatum(&lemm),
+ PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+ if ( lenlemm >= MAXSTRLEN )
+ elog(ERROR, "Word is too long");
+
+
+ if ( type >= cfg->len ) /* skip this type of lexem */
+ continue;
+
+ for(i=0;i<cfg->map[type].len;i++) {
+ DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+ char **norms, **ptr;
+
+ norms = ptr = (char**)DatumGetPointer(
+ FunctionCall3(
+ &(dict->lexize_info),
+ PointerGetDatum(dict->dictionary),
+ PointerGetDatum(lemm),
+ PointerGetDatum(lenlemm)
+ )
+ );
+ if ( !norms ) /* dictionary doesn't know this lexem */
+ continue;
+
+ prs->pos++; /*set pos*/
+
+ while( *ptr ) {
+ if (prs->curwords == prs->lenwords) {
+ prs->lenwords *= 2;
+ prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
+ }
+
+ prs->words[prs->curwords].len = strlen(*ptr);
+ prs->words[prs->curwords].word = *ptr;
+ prs->words[prs->curwords].alen = 0;
+ prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+ ptr++;
+ prs->curwords++;
+ }
+ pfree(norms);
+ break; /* lexem already normalized or is stop word*/
+ }
+ }
+
+ FunctionCall1(
+ &(prsobj->end_info),
+ PointerGetDatum(prsobj->prs)
+ );
+}
+
+static void
+hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
+ while (prs->curwords >= prs->lenwords) {
+ prs->lenwords *= 2;
+ prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+ }
+ memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) );
+ prs->words[prs->curwords].type = (uint8)type;
+ prs->words[prs->curwords].len = buflen;
+ prs->words[prs->curwords].word = palloc(buflen);
+ memcpy(prs->words[prs->curwords].word, buf, buflen);
+ prs->curwords++;
+}
+
+static void
+hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
+ int i;
+ ITEM *item=GETQUERY(query);
+ HLWORD *word=&( prs->words[prs->curwords-1] );
+
+ while (prs->curwords + query->size >= prs->lenwords) {
+ prs->lenwords *= 2;
+ prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
+ }
+
+ for(i=0; i<query->size; i++) {
+ if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
+ if ( word->item ) {
+ memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
+ prs->words[prs->curwords].item=item;
+ prs->words[prs->curwords].repeated=1;
+ prs->curwords++;
+ } else
+ word->item=item;
+ }
+ item++;
+ }
+}
+
+void
+hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
+ int type, lenlemm, i;
+ char *lemm=NULL;
+ WParserInfo *prsobj = findprs(cfg->prs_id);
+
+ prsobj->prs=(void*)DatumGetPointer(
+ FunctionCall2(
+ &(prsobj->start_info),
+ PointerGetDatum(buf),
+ Int32GetDatum(buflen)
+ )
+ );
+
+ while( ( type=DatumGetInt32(FunctionCall3(
+ &(prsobj->getlexeme_info),
+ PointerGetDatum(prsobj->prs),
+ PointerGetDatum(&lemm),
+ PointerGetDatum(&lenlemm))) ) != 0 ) {
+
+ if ( lenlemm >= MAXSTRLEN )
+ elog(ERROR, "Word is too long");
+
+ hladdword(prs,lemm,lenlemm,type);
+
+ if ( type >= cfg->len )
+ continue;
+
+ for(i=0;i<cfg->map[type].len;i++) {
+ DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
+ char **norms, **ptr;
+
+ norms = ptr = (char**)DatumGetPointer(
+ FunctionCall3(
+ &(dict->lexize_info),
+ PointerGetDatum(dict->dictionary),
+ PointerGetDatum(lemm),
+ PointerGetDatum(lenlemm)
+ )
+ );
+ if ( !norms ) /* dictionary doesn't know this lexem */
+ continue;
+
+ while( *ptr ) {
+ hlfinditem(prs,query,*ptr,strlen(*ptr));
+ pfree(*ptr);
+ ptr++;
+ }
+ pfree(norms);
+ break; /* lexem already normalized or is stop word*/
+ }
+ }
+
+ FunctionCall1(
+ &(prsobj->end_info),
+ PointerGetDatum(prsobj->prs)
+ );
+}
+
+text*
+genhl(HLPRSTEXT * prs) {
+ text *out;
+ int len=128;
+ char *ptr;
+ HLWORD *wrd=prs->words;
+
+ out = (text*)palloc( len );
+ ptr=((char*)out) + VARHDRSZ;
+
+ while( wrd - prs->words < prs->curwords ) {
+ while ( wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
+ int dist = ptr - ((char*)out);
+ len*= 2;
+ out = (text *) repalloc(out, len);
+ ptr=((char*)out) + dist;
+ }
+
+ if ( wrd->in && !wrd->skip && !wrd->repeated ) {
+ if ( wrd->replace ) {
+ *ptr=' ';
+ ptr++;
+ } else {
+ if (wrd->selected) {
+ memcpy(ptr,prs->startsel,prs->startsellen);
+ ptr+=prs->startsellen;
+ }
+ memcpy(ptr,wrd->word,wrd->len);
+ ptr+=wrd->len;
+ if (wrd->selected) {
+ memcpy(ptr,prs->stopsel,prs->stopsellen);
+ ptr+=prs->stopsellen;
+ }
+ }
+ }
+
+ if ( !wrd->repeated )
+ pfree(wrd->word);
+
+ wrd++;
+ }
+
+ VARATT_SIZEP(out)=ptr - ((char*)out);
+ return out;
+}
+
+int
+get_currcfg(void) {
+ Oid arg[1]={ TEXTOID };
+ const char *curlocale;
+ Datum pars[1];
+ bool isnull;
+ int stat;
+
+ if ( current_cfg_id > 0 )
+ return current_cfg_id;
+
+ SPI_connect();
+ if ( !plan_getcfg_bylocale ) {
+ plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
+ if ( !plan_getcfg_bylocale )
+ elog(ERROR, "SPI_prepare() failed");
+ }
+
+ curlocale = setlocale(LC_CTYPE, NULL);
+ pars[0] = PointerGetDatum( char2text((char*)curlocale) );
+ stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
+
+ if ( stat < 0 )
+ elog (ERROR, "SPI_execp return %d", stat);
+ if ( SPI_processed > 0 )
+ current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
+ else
+ elog(ERROR,"Can't find tsearch config by locale");
+
+ pfree(DatumGetPointer(pars[0]));
+ SPI_finish();
+ return current_cfg_id;
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg);
+Datum set_curcfg(PG_FUNCTION_ARGS);
+Datum
+set_curcfg(PG_FUNCTION_ARGS) {
+ findcfg(PG_GETARG_OID(0));
+ current_cfg_id=PG_GETARG_OID(0);
+ PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(set_curcfg_byname);
+Datum set_curcfg_byname(PG_FUNCTION_ARGS);
+Datum
+set_curcfg_byname(PG_FUNCTION_ARGS) {
+ text *name=PG_GETARG_TEXT_P(0);
+
+ DirectFunctionCall1(
+ set_curcfg,
+ ObjectIdGetDatum( name2id_cfg(name) )
+ );
+ PG_FREE_IF_COPY(name, 0);
+ PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(show_curcfg);
+Datum show_curcfg(PG_FUNCTION_ARGS);
+Datum
+show_curcfg(PG_FUNCTION_ARGS) {
+ PG_RETURN_OID( get_currcfg() );
+}
+
+PG_FUNCTION_INFO_V1(reset_tsearch);
+Datum reset_tsearch(PG_FUNCTION_ARGS);
+Datum
+reset_tsearch(PG_FUNCTION_ARGS) {
+ ts_error(NOTICE,"TSearch cache cleaned");
+ PG_RETURN_VOID();
+}