diff options
author | Teodor Sigaev <teodor@sigaev.ru> | 2003-07-21 10:27:44 +0000 |
---|---|---|
committer | Teodor Sigaev <teodor@sigaev.ru> | 2003-07-21 10:27:44 +0000 |
commit | b88605337ea37ce1080a696937540cf8b48338d1 (patch) | |
tree | ed26ee96ed8ca176fca8a956e938b3cc7adcb1f3 /contrib/tsearch2/tsvector.c | |
parent | a6053826703e288ccc7bd2a9a86a278cef6bc7ed (diff) | |
download | postgresql-b88605337ea37ce1080a696937540cf8b48338d1.tar.gz postgresql-b88605337ea37ce1080a696937540cf8b48338d1.zip |
tsearch2 module
Diffstat (limited to 'contrib/tsearch2/tsvector.c')
-rw-r--r-- | contrib/tsearch2/tsvector.c | 804 |
1 files changed, 804 insertions, 0 deletions
diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c new file mode 100644 index 00000000000..ff0794da009 --- /dev/null +++ b/contrib/tsearch2/tsvector.c @@ -0,0 +1,804 @@ +/* + * In/Out definitions for tsvector type + * Internal structure: + * string of values, array of position lexem in string and it's length + * Teodor Sigaev <teodor@sigaev.ru> + */ +#include "postgres.h" + +#include "access/gist.h" +#include "access/itup.h" +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/builtins.h" +#include "storage/bufpage.h" +#include "executor/spi.h" +#include "commands/trigger.h" +#include "nodes/pg_list.h" +#include "catalog/namespace.h" + +#include "utils/pg_locale.h" + +#include <ctype.h> /* tolower */ +#include "tsvector.h" +#include "query.h" +#include "ts_cfg.h" +#include "common.h" + +PG_FUNCTION_INFO_V1(tsvector_in); +Datum tsvector_in(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(tsvector_out); +Datum tsvector_out(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(to_tsvector); +Datum to_tsvector(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(to_tsvector_current); +Datum to_tsvector_current(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(to_tsvector_name); +Datum to_tsvector_name(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(tsearch2); +Datum tsearch2(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(tsvector_length); +Datum tsvector_length(PG_FUNCTION_ARGS); + +/* + * in/out text index type + */ +static int +comparePos(const void *a, const void *b) { + if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos ) + return 1; + return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1; +} + +static int +uniquePos(WordEntryPos *a, int4 l) { + WordEntryPos *ptr, *res; + + res=a; + if (l==1) + return l; + + qsort((void *) a, l, sizeof(WordEntryPos), comparePos); + + ptr = a + 1; + while (ptr - a < l) { + if ( ptr->pos != res->pos ) { + res++; + res->pos = ptr->pos; + res->weight = ptr->weight; + if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 ) + break; + } else if ( ptr->weight > res->weight ) + res->weight = ptr->weight; + ptr++; + } + return res + 1 - a; +} + +static char *BufferStr; +static int +compareentry(const void *a, const void *b) +{ + if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len) + { + return strncmp( + &BufferStr[((WordEntryIN *) a)->entry.pos], + &BufferStr[((WordEntryIN *) b)->entry.pos], + ((WordEntryIN *) a)->entry.len); + } + return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1; +} + +static int +uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen) +{ + WordEntryIN *ptr, + *res; + + res = a; + if (l == 1) { + if ( a->entry.haspos ) { + *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos)); + *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos); + } + return l; + } + + ptr = a + 1; + BufferStr = buf; + qsort((void *) a, l, sizeof(WordEntryIN), compareentry); + + while (ptr - a < l) + { + if (!(ptr->entry.len == res->entry.len && + strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0)) + { + if ( res->entry.haspos ) { + *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos)); + *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos); + } + *outbuflen += SHORTALIGN(res->entry.len); + res++; + memcpy(res,ptr,sizeof(WordEntryIN)); + } else if ( ptr->entry.haspos ){ + if ( res->entry.haspos ) { + int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos); + res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos)); + memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), + &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos)); + *(uint16*)(res->pos) += *(uint16*)(ptr->pos); + pfree( ptr->pos ); + } else { + res->entry.haspos=1; + res->pos = ptr->pos; + } + } + ptr++; + } + if ( res->entry.haspos ) { + *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos)); + *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos); + } + *outbuflen += SHORTALIGN(res->entry.len); + + return res + 1 - a; +} + +#define WAITWORD 1 +#define WAITENDWORD 2 +#define WAITNEXTCHAR 3 +#define WAITENDCMPLX 4 +#define WAITPOSINFO 5 +#define INPOSINFO 6 +#define WAITPOSDELIM 7 + +#define RESIZEPRSBUF \ +do { \ + if ( state->curpos - state->word + 1 >= state->len ) \ + { \ + int4 clen = state->curpos - state->word; \ + state->len *= 2; \ + state->word = (char*)repalloc( (void*)state->word, state->len ); \ + state->curpos = state->word + clen; \ + } \ +} while (0) + +int4 +gettoken_tsvector(TI_IN_STATE * state) +{ + int4 oldstate = 0; + + state->curpos = state->word; + state->state = WAITWORD; + state->alen=0; + + while (1) + { + if (state->state == WAITWORD) + { + if (*(state->prsbuf) == '\0') + return 0; + else if (*(state->prsbuf) == '\'') + state->state = WAITENDCMPLX; + else if (*(state->prsbuf) == '\\') + { + state->state = WAITNEXTCHAR; + oldstate = WAITENDWORD; + } + else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf))) + elog(ERROR, "Syntax error"); + else if (*(state->prsbuf) != ' ') + { + *(state->curpos) = *(state->prsbuf); + state->curpos++; + state->state = WAITENDWORD; + } + } + else if (state->state == WAITNEXTCHAR) + { + if (*(state->prsbuf) == '\0') + elog(ERROR, "There is no escaped character"); + else + { + RESIZEPRSBUF; + *(state->curpos) = *(state->prsbuf); + state->curpos++; + state->state = oldstate; + } + } + else if (state->state == WAITENDWORD) + { + if (*(state->prsbuf) == '\\') + { + state->state = WAITNEXTCHAR; + oldstate = WAITENDWORD; + } + else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' || + (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))) + { + RESIZEPRSBUF; + if (state->curpos == state->word) + elog(ERROR, "Syntax error"); + *(state->curpos) = '\0'; + return 1; + } else if ( *(state->prsbuf) == ':' ) { + if (state->curpos == state->word) + elog(ERROR, "Syntax error"); + *(state->curpos) = '\0'; + if ( state->oprisdelim ) + return 1; + else + state->state = INPOSINFO; + } + else + { + RESIZEPRSBUF; + *(state->curpos) = *(state->prsbuf); + state->curpos++; + } + } + else if (state->state == WAITENDCMPLX) + { + if (*(state->prsbuf) == '\'') + { + RESIZEPRSBUF; + *(state->curpos) = '\0'; + if (state->curpos == state->word) + elog(ERROR, "Syntax error"); + if ( state->oprisdelim ) { + state->prsbuf++; + return 1; + } else + state->state = WAITPOSINFO; + } + else if (*(state->prsbuf) == '\\') + { + state->state = WAITNEXTCHAR; + oldstate = WAITENDCMPLX; + } + else if (*(state->prsbuf) == '\0') + elog(ERROR, "Syntax error"); + else + { + RESIZEPRSBUF; + *(state->curpos) = *(state->prsbuf); + state->curpos++; + } + } else if (state->state == WAITPOSINFO) { + if ( *(state->prsbuf) == ':' ) + state->state=INPOSINFO; + else + return 1; + } else if (state->state == INPOSINFO) { + if ( isdigit(*(state->prsbuf)) ) { + if ( state->alen==0 ) { + state->alen=4; + state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen ); + *(uint16*)(state->pos)=0; + } else if ( *(uint16*)(state->pos) +1 >= state->alen ) { + state->alen *= 2; + state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen ); + } + ( *(uint16*)(state->pos) )++; + state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf)); + if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 ) + elog(ERROR,"Wrong position info"); + state->pos[ *(uint16*)(state->pos) ].weight = 0; + state->state = WAITPOSDELIM; + } else + elog(ERROR,"Syntax error"); + } else if (state->state == WAITPOSDELIM) { + if ( *(state->prsbuf) == ',' ) { + state->state = INPOSINFO; + } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) { + if ( state->pos[ *(uint16*)(state->pos) ].weight ) + elog(ERROR,"Syntax error"); + state->pos[ *(uint16*)(state->pos) ].weight = 3; + } else if ( tolower(*(state->prsbuf)) == 'b' ) { + if ( state->pos[ *(uint16*)(state->pos) ].weight ) + elog(ERROR,"Syntax error"); + state->pos[ *(uint16*)(state->pos) ].weight = 2; + } else if ( tolower(*(state->prsbuf)) == 'c' ) { + if ( state->pos[ *(uint16*)(state->pos) ].weight ) + elog(ERROR,"Syntax error"); + state->pos[ *(uint16*)(state->pos) ].weight = 1; + } else if ( tolower(*(state->prsbuf)) == 'd' ) { + if ( state->pos[ *(uint16*)(state->pos) ].weight ) + elog(ERROR,"Syntax error"); + state->pos[ *(uint16*)(state->pos) ].weight = 0; + } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) { + return 1; + } else if ( !isdigit(*(state->prsbuf)) ) + elog(ERROR,"Syntax error"); + } else + elog(ERROR, "Inner bug :("); + state->prsbuf++; + } + + return 0; +} + +Datum +tsvector_in(PG_FUNCTION_ARGS) +{ + char *buf = PG_GETARG_CSTRING(0); + TI_IN_STATE state; + WordEntryIN *arr; + WordEntry *inarr; + int4 len = 0, + totallen = 64; + tsvector *in; + char *tmpbuf, + *cur; + int4 i, + buflen = 256; + + state.prsbuf = buf; + state.len = 32; + state.word = (char *) palloc(state.len); + state.oprisdelim = false; + + arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen); + cur = tmpbuf = (char *) palloc(buflen); + while (gettoken_tsvector(&state)) + { + if (len >= totallen) + { + totallen *= 2; + arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen); + } + while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen) + { + int4 dist = cur - tmpbuf; + + buflen *= 2; + tmpbuf = (char *) repalloc((void *) tmpbuf, buflen); + cur = tmpbuf + dist; + } + if (state.curpos - state.word >= MAXSTRLEN) + elog(ERROR, "Word is too long"); + arr[len].entry.len= state.curpos - state.word; + if (cur - tmpbuf > MAXSTRPOS) + elog(ERROR, "Too long value"); + arr[len].entry.pos=cur - tmpbuf; + memcpy((void *) cur, (void *) state.word, arr[len].entry.len); + cur += arr[len].entry.len; + if ( state.alen ) { + arr[len].entry.haspos=1; + arr[len].pos = state.pos; + } else + arr[len].entry.haspos=0; + len++; + } + pfree(state.word); + + if ( len > 0 ) + len = uniqueentry(arr, len, tmpbuf, &buflen); + totallen = CALCDATASIZE(len, buflen); + in = (tsvector *) palloc(totallen); + memset(in,0,totallen); + in->len = totallen; + in->size = len; + cur = STRPTR(in); + inarr = ARRPTR(in); + for (i = 0; i < len; i++) + { + memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len); + arr[i].entry.pos=cur - STRPTR(in); + cur += SHORTALIGN(arr[i].entry.len); + if ( arr[i].entry.haspos ) { + memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos)); + cur += (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos); + pfree( arr[i].pos ); + } + memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) ); + } + pfree(tmpbuf); + pfree(arr); + PG_RETURN_POINTER(in); +} + +Datum +tsvector_length(PG_FUNCTION_ARGS) +{ + tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + int4 ret = in->size; + + PG_FREE_IF_COPY(in, 0); + PG_RETURN_INT32(ret); +} + +Datum +tsvector_out(PG_FUNCTION_ARGS) +{ + tsvector *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + char *outbuf; + int4 i, + j, + lenbuf = 0, pp; + WordEntry *ptr = ARRPTR(out); + char *curin, + *curout; + + lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/; + for (i = 0; i < out->size; i++) { + lenbuf += ptr[i].len*2 /*for escape */; + if ( ptr[i].haspos ) + lenbuf += 7*POSDATALEN(out, &(ptr[i])); + } + + curout = outbuf = (char *) palloc(lenbuf); + for (i = 0; i < out->size; i++) + { + curin = STRPTR(out)+ptr->pos; + if (i != 0) + *curout++ = ' '; + *curout++ = '\''; + j = ptr->len; + while (j--) + { + if (*curin == '\'') + { + int4 pos = curout - outbuf; + + outbuf = (char *) repalloc((void *) outbuf, ++lenbuf); + curout = outbuf + pos; + *curout++ = '\\'; + } + *curout++ = *curin++; + } + *curout++ = '\''; + if ( (pp=POSDATALEN(out,ptr)) != 0 ) { + WordEntryPos *wptr; + *curout++ = ':'; + wptr=POSDATAPTR(out,ptr); + while(pp) { + sprintf(curout,"%d",wptr->pos); + curout=strchr(curout,'\0'); + switch( wptr->weight ) { + case 3: *curout++ = 'A'; break; + case 2: *curout++ = 'B'; break; + case 1: *curout++ = 'C'; break; + case 0: + default: break; + } + if ( pp>1 ) *curout++ = ','; + pp--; wptr++; + } + } + ptr++; + } + *curout='\0'; + outbuf[lenbuf - 1] = '\0'; + PG_FREE_IF_COPY(out, 0); + PG_RETURN_POINTER(outbuf); +} + +static int +compareWORD(const void *a, const void *b) +{ + if (((WORD *) a)->len == ((WORD *) b)->len) { + int res = strncmp( + ((WORD *) a)->word, + ((WORD *) b)->word, + ((WORD *) b)->len); + if ( res==0 ) + return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1; + return res; + } + return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1; +} + +static int +uniqueWORD(WORD * a, int4 l) +{ + WORD *ptr, + *res; + int tmppos; + + if (l == 1) { + tmppos=LIMITPOS(a->pos.pos); + a->alen=2; + a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen ); + a->pos.apos[0]=1; + a->pos.apos[1]=tmppos; + return l; + } + + res = a; + ptr = a + 1; + + qsort((void *) a, l, sizeof(WORD), compareWORD); + tmppos=LIMITPOS(a->pos.pos); + a->alen=2; + a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen ); + a->pos.apos[0]=1; + a->pos.apos[1]=tmppos; + + while (ptr - a < l) + { + if (!(ptr->len == res->len && + strncmp(ptr->word, res->word, res->len) == 0)) + { + res++; + res->len = ptr->len; + res->word = ptr->word; + tmppos=LIMITPOS(ptr->pos.pos); + res->alen=2; + res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen ); + res->pos.apos[0]=1; + res->pos.apos[1]=tmppos; + } else { + pfree(ptr->word); + if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) { + if ( res->pos.apos[0]+1 >= res->alen ) { + res->alen*=2; + res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen ); + } + res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos); + res->pos.apos[0]++; + } + } + ptr++; + } + + return res + 1 - a; +} + +/* + * make value of tsvector + */ +static tsvector * +makevalue(PRSTEXT * prs) +{ + int4 i,j, + lenstr = 0, + totallen; + tsvector *in; + WordEntry *ptr; + char *str, + *cur; + + prs->curwords = uniqueWORD(prs->words, prs->curwords); + for (i = 0; i < prs->curwords; i++) { + lenstr += SHORTALIGN(prs->words[i].len); + + if ( prs->words[i].alen ) + lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); + } + + totallen = CALCDATASIZE(prs->curwords, lenstr); + in = (tsvector *) palloc(totallen); + memset(in,0,totallen); + in->len = totallen; + in->size = prs->curwords; + + ptr = ARRPTR(in); + cur = str = STRPTR(in); + for (i = 0; i < prs->curwords; i++) + { + ptr->len = prs->words[i].len; + if (cur - str > MAXSTRPOS) + elog(ERROR, "Value is too big"); + ptr->pos= cur - str; + memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len); + pfree(prs->words[i].word); + cur += SHORTALIGN(prs->words[i].len); + if ( prs->words[i].alen ) { + WordEntryPos *wptr; + + ptr->haspos=1; + *(uint16*)cur = prs->words[i].pos.apos[0]; + wptr=POSDATAPTR(in,ptr); + for(j=0;j<*(uint16*)cur;j++) { + wptr[j].weight=0; + wptr[j].pos=prs->words[i].pos.apos[j+1]; + } + cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); + pfree(prs->words[i].pos.apos); + } else + ptr->haspos=0; + ptr++; + } + pfree(prs->words); + return in; +} + + +Datum +to_tsvector(PG_FUNCTION_ARGS) +{ + text *in = PG_GETARG_TEXT_P(1); + PRSTEXT prs; + tsvector *out = NULL; + TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); + + prs.lenwords = 32; + prs.curwords = 0; + prs.pos = 0; + prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords); + + parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ); + PG_FREE_IF_COPY(in, 1); + + if (prs.curwords) + out = makevalue(&prs); + else { + pfree(prs.words); + out = palloc(CALCDATASIZE(0,0)); + out->len = CALCDATASIZE(0,0); + out->size = 0; + } + PG_RETURN_POINTER(out); +} + +Datum +to_tsvector_name(PG_FUNCTION_ARGS) { + text *cfg=PG_GETARG_TEXT_P(0); + Datum res = DirectFunctionCall3( + to_tsvector, + Int32GetDatum( name2id_cfg( cfg ) ), + PG_GETARG_DATUM(1), + (Datum)0 + ); + PG_FREE_IF_COPY(cfg,0); + PG_RETURN_DATUM(res); +} + +Datum +to_tsvector_current(PG_FUNCTION_ARGS) { + Datum res = DirectFunctionCall3( + to_tsvector, + Int32GetDatum( get_currcfg() ), + PG_GETARG_DATUM(0), + (Datum)0 + ); + PG_RETURN_DATUM(res); +} + +static Oid +findFunc(char *fname) { + FuncCandidateList clist,ptr; + Oid funcid = InvalidOid; + List *names=makeList1(makeString(fname)); + + ptr = clist = FuncnameGetCandidates(names, 1); + freeList(names); + + if ( !ptr ) + return funcid; + + while(ptr) { + if ( ptr->args[0] == TEXTOID && funcid == InvalidOid ) + funcid=ptr->oid; + clist=ptr->next; + pfree(ptr); + ptr=clist; + } + + return funcid; +} + +/* + * Trigger + */ +Datum +tsearch2(PG_FUNCTION_ARGS) +{ + TriggerData *trigdata; + Trigger *trigger; + Relation rel; + HeapTuple rettuple = NULL; + TSCfgInfo *cfg=findcfg(get_currcfg()); + int numidxattr, + i; + PRSTEXT prs; + Datum datum = (Datum) 0; + Oid funcoid = InvalidOid; + + if (!CALLED_AS_TRIGGER(fcinfo)) + elog(ERROR, "TSearch: Not fired by trigger manager"); + + trigdata = (TriggerData *) fcinfo->context; + if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event)) + elog(ERROR, "TSearch: Can't process STATEMENT events"); + if (TRIGGER_FIRED_AFTER(trigdata->tg_event)) + elog(ERROR, "TSearch: Must be fired BEFORE event"); + + if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) + rettuple = trigdata->tg_trigtuple; + else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) + rettuple = trigdata->tg_newtuple; + else + elog(ERROR, "TSearch: Unknown event"); + + trigger = trigdata->tg_trigger; + rel = trigdata->tg_relation; + + if (trigger->tgnargs < 2) + elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)"); + + numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]); + if (numidxattr == SPI_ERROR_NOATTRIBUTE) + elog(ERROR, "TSearch: Can not find tsvector_field"); + + prs.lenwords = 32; + prs.curwords = 0; + prs.pos = 0; + prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords); + + /* find all words in indexable column */ + for (i = 1; i < trigger->tgnargs; i++) + { + int numattr; + Oid oidtype; + Datum txt_toasted; + bool isnull; + text *txt; + + numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]); + if (numattr == SPI_ERROR_NOATTRIBUTE) + { + funcoid=findFunc(trigger->tgargs[i]); + if ( funcoid==InvalidOid ) + elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]); + continue; + } + oidtype = SPI_gettypeid(rel->rd_att, numattr); + /* We assume char() and varchar() are binary-equivalent to text */ + if (!(oidtype == TEXTOID || + oidtype == VARCHAROID || + oidtype == BPCHAROID)) + { + elog(WARNING, "TSearch: '%s' is not of character type", + trigger->tgargs[i]); + continue; + } + txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull); + if (isnull) + continue; + + if ( funcoid!=InvalidOid ) { + text *txttmp = (text *) DatumGetPointer( OidFunctionCall1( + funcoid, + PointerGetDatum(txt_toasted) + )); + txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp))); + if ( txt == txttmp ) + txt_toasted = PointerGetDatum(txt); + } else + txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted))); + + parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ); + if (txt != (text*)DatumGetPointer(txt_toasted) ) + pfree(txt); + } + + /* make tsvector value */ + if (prs.curwords) + { + datum = PointerGetDatum(makevalue(&prs)); + rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr, + &datum, NULL); + pfree(DatumGetPointer(datum)); + } + else + { + tsvector *out = palloc(CALCDATASIZE(0,0)); + out->len = CALCDATASIZE(0,0); + out->size = 0; + datum = PointerGetDatum(out); + pfree(prs.words); + rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr, + &datum, NULL); + } + + if (rettuple == NULL) + elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result); + + return PointerGetDatum(rettuple); +} |