diff options
Diffstat (limited to 'contrib/tsearch2/ts_cfg.c')
-rw-r--r-- | contrib/tsearch2/ts_cfg.c | 509 |
1 files changed, 509 insertions, 0 deletions
diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c new file mode 100644 index 00000000000..7c9f20c8de9 --- /dev/null +++ b/contrib/tsearch2/ts_cfg.c @@ -0,0 +1,509 @@ +/* + * interface functions to tscfg + * Teodor Sigaev <teodor@sigaev.ru> + */ +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <locale.h> + +#include "postgres.h" +#include "fmgr.h" +#include "utils/array.h" +#include "catalog/pg_type.h" +#include "executor/spi.h" + +#include "ts_cfg.h" +#include "dict.h" +#include "wparser.h" +#include "snmap.h" +#include "common.h" +#include "tsvector.h" + +/*********top interface**********/ + +static void *plan_getcfg_bylocale=NULL; +static void *plan_getcfg=NULL; +static void *plan_getmap=NULL; +static void *plan_name2id=NULL; +static Oid current_cfg_id=0; + +void +init_cfg(Oid id, TSCfgInfo *cfg) { + Oid arg[2]={ OIDOID, OIDOID }; + bool isnull; + Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ; + int stat,i,j; + text *ptr; + text *prsname=NULL; + MemoryContext oldcontext; + + memset(cfg,0,sizeof(TSCfgInfo)); + SPI_connect(); + if ( !plan_getcfg ) { + plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) ); + if ( !plan_getcfg ) + ts_error(ERROR, "SPI_prepare() failed"); + } + + stat = SPI_execp(plan_getcfg, pars, " ", 1); + if ( stat < 0 ) + ts_error (ERROR, "SPI_execp return %d", stat); + if ( SPI_processed > 0 ) { + prsname = (text*) DatumGetPointer( + SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) + ); + oldcontext = MemoryContextSwitchTo(TopMemoryContext); + prsname = ptextdup( prsname ); + MemoryContextSwitchTo(oldcontext); + + cfg->id=id; + } else + ts_error(ERROR, "No tsearch cfg with id %d", id); + + arg[0]=TEXTOID; + if ( !plan_getmap ) { + plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) ); + if ( !plan_getmap ) + ts_error(ERROR, "SPI_prepare() failed"); + } + + pars[0]=PointerGetDatum( prsname ); + stat = SPI_execp(plan_getmap, pars, " ", 0); + if ( stat < 0 ) + ts_error (ERROR, "SPI_execp return %d", stat); + if ( SPI_processed <= 0 ) + ts_error(ERROR, "No parser with id %d", id); + + for(i=0;i<SPI_processed;i++) { + int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull)); + ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull)); + ArrayType *a; + + if ( !cfg->map ) { + cfg->len=lexid+1; + cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len ); + if ( !cfg->map ) + ts_error(ERROR,"No memory"); + memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len ); + } + + if (isnull) + continue; + + a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) ); + + if ( ARR_NDIM(a) != 1 ) + ts_error(ERROR,"Wrong dimension"); + if ( ARRNELEMS(a) < 1 ) + continue; + + cfg->map[lexid].len=ARRNELEMS(a); + cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len ); + memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len ); + ptr=(text*)ARR_DATA_PTR(a); + oldcontext = MemoryContextSwitchTo(TopMemoryContext); + for(j=0;j<cfg->map[lexid].len;j++) { + cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr)); + ptr=NEXTVAL(ptr); + } + MemoryContextSwitchTo(oldcontext); + + if ( a != toasted_a ) + pfree(a); + } + + SPI_finish(); + cfg->prs_id = name2id_prs( prsname ); + pfree(prsname); + for(i=0;i<cfg->len;i++) { + for(j=0;j<cfg->map[i].len;j++) { + ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] ); + cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) ); + pfree(ptr); + } + } +} + +typedef struct { + TSCfgInfo *last_cfg; + int len; + int reallen; + TSCfgInfo *list; + SNMap name2id_map; +} CFGList; + +static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}}; + +void +reset_cfg(void) { + freeSNMap( &(CList.name2id_map) ); + if ( CList.list ) { + int i,j; + for(i=0;i<CList.len;i++) + if ( CList.list[i].map ) { + for(j=0;j<CList.list[i].len;j++) + if ( CList.list[i].map[j].dict_id ) + free(CList.list[i].map[j].dict_id); + free( CList.list[i].map ); + } + free(CList.list); + } + memset(&CList,0,sizeof(CFGList)); +} + +static int +comparecfg(const void *a, const void *b) { + return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id; +} + +TSCfgInfo * +findcfg(Oid id) { + /* last used cfg */ + if ( CList.last_cfg && CList.last_cfg->id==id ) + return CList.last_cfg; + + /* already used cfg */ + if ( CList.len != 0 ) { + TSCfgInfo key; + key.id=id; + CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg); + if ( CList.last_cfg != NULL ) + return CList.last_cfg; + } + + /* last chance */ + if ( CList.len==CList.reallen ) { + TSCfgInfo *tmp; + int reallen = ( CList.reallen ) ? 2*CList.reallen : 16; + tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen); + if ( !tmp ) + ts_error(ERROR,"No memory"); + CList.reallen=reallen; + CList.list=tmp; + } + CList.last_cfg=&(CList.list[CList.len]); + init_cfg(id, CList.last_cfg); + CList.len++; + qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg); + return findcfg(id); /* qsort changed order!! */; +} + + +Oid +name2id_cfg(text *name) { + Oid arg[1]={ TEXTOID }; + bool isnull; + Datum pars[1]={ PointerGetDatum(name) }; + int stat; + Oid id=findSNMap_t( &(CList.name2id_map), name ); + + if ( id ) + return id; + + SPI_connect(); + if ( !plan_name2id ) { + plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) ); + if ( !plan_name2id ) + elog(ERROR, "SPI_prepare() failed"); + } + + stat = SPI_execp(plan_name2id, pars, " ", 1); + if ( stat < 0 ) + elog (ERROR, "SPI_execp return %d", stat); + if ( SPI_processed > 0 ) { + id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) ); + if ( isnull ) + elog(ERROR, "Null id for tsearch config"); + } else + elog(ERROR, "No tsearch config"); + SPI_finish(); + addSNMap_t( &(CList.name2id_map), name, id ); + return id; +} + + +void +parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) { + int type, lenlemm, i; + char *lemm=NULL; + WParserInfo *prsobj = findprs(cfg->prs_id); + + prsobj->prs=(void*)DatumGetPointer( + FunctionCall2( + &(prsobj->start_info), + PointerGetDatum(buf), + Int32GetDatum(buflen) + ) + ); + + while( ( type=DatumGetInt32(FunctionCall3( + &(prsobj->getlexeme_info), + PointerGetDatum(prsobj->prs), + PointerGetDatum(&lemm), + PointerGetDatum(&lenlemm))) ) != 0 ) { + + if ( lenlemm >= MAXSTRLEN ) + elog(ERROR, "Word is too long"); + + + if ( type >= cfg->len ) /* skip this type of lexem */ + continue; + + for(i=0;i<cfg->map[type].len;i++) { + DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) ); + char **norms, **ptr; + + norms = ptr = (char**)DatumGetPointer( + FunctionCall3( + &(dict->lexize_info), + PointerGetDatum(dict->dictionary), + PointerGetDatum(lemm), + PointerGetDatum(lenlemm) + ) + ); + if ( !norms ) /* dictionary doesn't know this lexem */ + continue; + + prs->pos++; /*set pos*/ + + while( *ptr ) { + if (prs->curwords == prs->lenwords) { + prs->lenwords *= 2; + prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD)); + } + + prs->words[prs->curwords].len = strlen(*ptr); + prs->words[prs->curwords].word = *ptr; + prs->words[prs->curwords].alen = 0; + prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos); + ptr++; + prs->curwords++; + } + pfree(norms); + break; /* lexem already normalized or is stop word*/ + } + } + + FunctionCall1( + &(prsobj->end_info), + PointerGetDatum(prsobj->prs) + ); +} + +static void +hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) { + while (prs->curwords >= prs->lenwords) { + prs->lenwords *= 2; + prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD)); + } + memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); + prs->words[prs->curwords].type = (uint8)type; + prs->words[prs->curwords].len = buflen; + prs->words[prs->curwords].word = palloc(buflen); + memcpy(prs->words[prs->curwords].word, buf, buflen); + prs->curwords++; +} + +static void +hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) { + int i; + ITEM *item=GETQUERY(query); + HLWORD *word=&( prs->words[prs->curwords-1] ); + + while (prs->curwords + query->size >= prs->lenwords) { + prs->lenwords *= 2; + prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD)); + } + + for(i=0; i<query->size; i++) { + if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) { + if ( word->item ) { + memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) ); + prs->words[prs->curwords].item=item; + prs->words[prs->curwords].repeated=1; + prs->curwords++; + } else + word->item=item; + } + item++; + } +} + +void +hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) { + int type, lenlemm, i; + char *lemm=NULL; + WParserInfo *prsobj = findprs(cfg->prs_id); + + prsobj->prs=(void*)DatumGetPointer( + FunctionCall2( + &(prsobj->start_info), + PointerGetDatum(buf), + Int32GetDatum(buflen) + ) + ); + + while( ( type=DatumGetInt32(FunctionCall3( + &(prsobj->getlexeme_info), + PointerGetDatum(prsobj->prs), + PointerGetDatum(&lemm), + PointerGetDatum(&lenlemm))) ) != 0 ) { + + if ( lenlemm >= MAXSTRLEN ) + elog(ERROR, "Word is too long"); + + hladdword(prs,lemm,lenlemm,type); + + if ( type >= cfg->len ) + continue; + + for(i=0;i<cfg->map[type].len;i++) { + DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) ); + char **norms, **ptr; + + norms = ptr = (char**)DatumGetPointer( + FunctionCall3( + &(dict->lexize_info), + PointerGetDatum(dict->dictionary), + PointerGetDatum(lemm), + PointerGetDatum(lenlemm) + ) + ); + if ( !norms ) /* dictionary doesn't know this lexem */ + continue; + + while( *ptr ) { + hlfinditem(prs,query,*ptr,strlen(*ptr)); + pfree(*ptr); + ptr++; + } + pfree(norms); + break; /* lexem already normalized or is stop word*/ + } + } + + FunctionCall1( + &(prsobj->end_info), + PointerGetDatum(prsobj->prs) + ); +} + +text* +genhl(HLPRSTEXT * prs) { + text *out; + int len=128; + char *ptr; + HLWORD *wrd=prs->words; + + out = (text*)palloc( len ); + ptr=((char*)out) + VARHDRSZ; + + while( wrd - prs->words < prs->curwords ) { + while ( wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) { + int dist = ptr - ((char*)out); + len*= 2; + out = (text *) repalloc(out, len); + ptr=((char*)out) + dist; + } + + if ( wrd->in && !wrd->skip && !wrd->repeated ) { + if ( wrd->replace ) { + *ptr=' '; + ptr++; + } else { + if (wrd->selected) { + memcpy(ptr,prs->startsel,prs->startsellen); + ptr+=prs->startsellen; + } + memcpy(ptr,wrd->word,wrd->len); + ptr+=wrd->len; + if (wrd->selected) { + memcpy(ptr,prs->stopsel,prs->stopsellen); + ptr+=prs->stopsellen; + } + } + } + + if ( !wrd->repeated ) + pfree(wrd->word); + + wrd++; + } + + VARATT_SIZEP(out)=ptr - ((char*)out); + return out; +} + +int +get_currcfg(void) { + Oid arg[1]={ TEXTOID }; + const char *curlocale; + Datum pars[1]; + bool isnull; + int stat; + + if ( current_cfg_id > 0 ) + return current_cfg_id; + + SPI_connect(); + if ( !plan_getcfg_bylocale ) { + plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) ); + if ( !plan_getcfg_bylocale ) + elog(ERROR, "SPI_prepare() failed"); + } + + curlocale = setlocale(LC_CTYPE, NULL); + pars[0] = PointerGetDatum( char2text((char*)curlocale) ); + stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1); + + if ( stat < 0 ) + elog (ERROR, "SPI_execp return %d", stat); + if ( SPI_processed > 0 ) + current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) ); + else + elog(ERROR,"Can't find tsearch config by locale"); + + pfree(DatumGetPointer(pars[0])); + SPI_finish(); + return current_cfg_id; +} + +PG_FUNCTION_INFO_V1(set_curcfg); +Datum set_curcfg(PG_FUNCTION_ARGS); +Datum +set_curcfg(PG_FUNCTION_ARGS) { + findcfg(PG_GETARG_OID(0)); + current_cfg_id=PG_GETARG_OID(0); + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(set_curcfg_byname); +Datum set_curcfg_byname(PG_FUNCTION_ARGS); +Datum +set_curcfg_byname(PG_FUNCTION_ARGS) { + text *name=PG_GETARG_TEXT_P(0); + + DirectFunctionCall1( + set_curcfg, + ObjectIdGetDatum( name2id_cfg(name) ) + ); + PG_FREE_IF_COPY(name, 0); + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(show_curcfg); +Datum show_curcfg(PG_FUNCTION_ARGS); +Datum +show_curcfg(PG_FUNCTION_ARGS) { + PG_RETURN_OID( get_currcfg() ); +} + +PG_FUNCTION_INFO_V1(reset_tsearch); +Datum reset_tsearch(PG_FUNCTION_ARGS); +Datum +reset_tsearch(PG_FUNCTION_ARGS) { + ts_error(NOTICE,"TSearch cache cleaned"); + PG_RETURN_VOID(); +} |