diff options
author | Teodor Sigaev <teodor@sigaev.ru> | 2005-11-08 17:08:46 +0000 |
---|---|---|
committer | Teodor Sigaev <teodor@sigaev.ru> | 2005-11-08 17:08:46 +0000 |
commit | 0645663e6cf20fee555ae0acf428b85fd20a5fed (patch) | |
tree | 767b927057309a95d153c2ec8b929ace5579f247 /contrib/tsearch2/query_gist.c | |
parent | 6521ea008e2c02c69c13e69c78383114a95caa9e (diff) | |
download | postgresql-0645663e6cf20fee555ae0acf428b85fd20a5fed.tar.gz postgresql-0645663e6cf20fee555ae0acf428b85fd20a5fed.zip |
New features for tsearch2:
1 Comparison operation for tsquery
2 Btree index on tsquery
3 numnode(tsquery) - returns 'length' of tsquery
4 tsquery @ tsquery, tsquery ~ tsquery - contains, contained for tsquery.
Note: They don't gurantee exact result, only MAY BE, so it
useful only for speed up rewrite functions
5 GiST index support for @,~
6 rewrite():
select rewrite(orig, what, to);
select rewrite(ARRAY[orig, what, to]) from tsquery_table;
select rewrite(orig, 'select what, to from tsquery_table;');
7 significantly improve cover algorithm
Diffstat (limited to 'contrib/tsearch2/query_gist.c')
-rw-r--r-- | contrib/tsearch2/query_gist.c | 324 |
1 files changed, 324 insertions, 0 deletions
diff --git a/contrib/tsearch2/query_gist.c b/contrib/tsearch2/query_gist.c new file mode 100644 index 00000000000..630d3e4e00f --- /dev/null +++ b/contrib/tsearch2/query_gist.c @@ -0,0 +1,324 @@ +#include "postgres.h" + +#include "storage/bufpage.h" +#include "access/skey.h" +#include "access/gist.h" + +#include "query.h" + +typedef uint64 TPQTGist; + +#define GETENTRY(vec,pos) ((TPQTGist *) DatumGetPointer((vec)->vector[(pos)].key)) + +PG_FUNCTION_INFO_V1(tsq_mcontains); +Datum tsq_mcontains(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(tsq_mcontained); +Datum tsq_mcontained(PG_FUNCTION_ARGS); + +static TPQTGist +makesign(QUERYTYPE* a) { + int i; + ITEM *ptr = GETQUERY(a); + TPQTGist sign = 0; + + for (i = 0; i < a->size; i++) { + if ( ptr->type == VAL ) + sign |= 1 << (ptr->val % 64); + ptr++; + } + + return sign; +} + +Datum +tsq_mcontains(PG_FUNCTION_ARGS) { + QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); + QUERYTYPE *ex = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1))); + TPQTGist sq, se; + int i,j; + ITEM *iq, *ie; + + if ( query->size < ex->size ) { + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + + PG_RETURN_BOOL( false ); + } + + sq = makesign(query); + se = makesign(ex); + + if ( (sq&se)!=se ) { + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + + PG_RETURN_BOOL( false ); + } + + ie = GETQUERY(ex); + + for(i=0;i<ex->size;i++) { + iq = GETQUERY(query); + if ( ie[i].type != VAL ) + continue; + for(j=0;j<query->size;j++) + if ( iq[j].type == VAL && ie[i].val == iq[j].val ) { + j = query->size+1; + break; + } + if ( j == query->size ) { + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + + PG_RETURN_BOOL( false ); + } + } + + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + + PG_RETURN_BOOL( true ); +} + +Datum +tsq_mcontained(PG_FUNCTION_ARGS) { + PG_RETURN_DATUM( + DirectFunctionCall2( + tsq_mcontains, + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(0) + ) + ); +} + +PG_FUNCTION_INFO_V1(gtsq_in); +Datum gtsq_in(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_out); +Datum gtsq_out(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_compress); +Datum gtsq_compress(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_decompress); +Datum gtsq_decompress(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_consistent); +Datum gtsq_consistent(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_union); +Datum gtsq_union(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_same); +Datum gtsq_same(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_penalty); +Datum gtsq_penalty(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_picksplit); +Datum gtsq_picksplit(PG_FUNCTION_ARGS); + + +Datum +gtsq_in(PG_FUNCTION_ARGS) { + elog(ERROR, "Not implemented"); + PG_RETURN_DATUM(0); +} + +Datum +gtsq_out(PG_FUNCTION_ARGS) { + elog(ERROR, "Not implemented"); + PG_RETURN_DATUM(0); +} + +Datum +gtsq_compress(PG_FUNCTION_ARGS) { + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + GISTENTRY *retval = entry; + + if (entry->leafkey) { + TPQTGist *sign = (TPQTGist*)palloc( sizeof(TPQTGist) ); + retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); + *sign = makesign( (QUERYTYPE*)DatumGetPointer(PG_DETOAST_DATUM(entry->key)) ); + + gistentryinit(*retval, PointerGetDatum(sign), + entry->rel, entry->page, + entry->offset, sizeof(TPQTGist), FALSE); + } + + PG_RETURN_POINTER(retval); +} + +Datum +gtsq_decompress(PG_FUNCTION_ARGS) { + PG_RETURN_DATUM(PG_GETARG_DATUM(0)); +} + +Datum +gtsq_consistent(PG_FUNCTION_ARGS) { + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + TPQTGist *key = (TPQTGist*) DatumGetPointer(entry->key); + QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1))); + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + TPQTGist sq = makesign(query); + + if ( GIST_LEAF(entry) ) + PG_RETURN_BOOL( ( (*key) & sq ) == ((strategy==1) ? sq : *key) ); + else + PG_RETURN_BOOL( (*key) & sq ); +} + +Datum +gtsq_union(PG_FUNCTION_ARGS) { + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + TPQTGist *sign = (TPQTGist*)palloc( sizeof(TPQTGist) ); + int i; + int *size = (int *) PG_GETARG_POINTER(1); + + memset( sign, 0, sizeof(TPQTGist) ); + + for (i = 0; i < entryvec->n;i++) + *sign |= *GETENTRY(entryvec, i); + + *size = sizeof(TPQTGist); + + PG_RETURN_POINTER(sign); +} + +Datum +gtsq_same(PG_FUNCTION_ARGS) { + TPQTGist *a = (TPQTGist *) PG_GETARG_POINTER(0); + TPQTGist *b = (TPQTGist *) PG_GETARG_POINTER(1); + + PG_RETURN_POINTER( *a == *b ); +} + +static int +sizebitvec(TPQTGist sign) { + int size=0,i; + + for(i=0;i<64;i++) + size += 0x01 & (sign>>i); +} + +static int +hemdist(TPQTGist a, TPQTGist b) { + TPQTGist res = a ^ b; + + return sizebitvec(res); +} + +Datum +gtsq_penalty(PG_FUNCTION_ARGS) { + TPQTGist *origval = (TPQTGist*) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key); + TPQTGist *newval = (TPQTGist*) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key); + float *penalty = (float *) PG_GETARG_POINTER(2); + + *penalty = hemdist(*origval, *newval); + + PG_RETURN_POINTER(penalty); +} + + +typedef struct { + OffsetNumber pos; + int4 cost; +} SPLITCOST; + +static int +comparecost(const void *a, const void *b) { + if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost) + return 0; + else + return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1; +} + +#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) + +Datum +gtsq_picksplit(PG_FUNCTION_ARGS) { + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); + OffsetNumber maxoff = entryvec->n - 2; + OffsetNumber k,j; + + TPQTGist *datum_l, *datum_r; + int4 size_alpha, size_beta; + int4 size_waste, waste = -1; + int4 nbytes; + OffsetNumber seed_1 = 0, seed_2 = 0; + OffsetNumber *left, *right; + + SPLITCOST *costvector; + + nbytes = (maxoff + 2) * sizeof(OffsetNumber); + left = v->spl_left = (OffsetNumber *) palloc(nbytes); + right = v->spl_right = (OffsetNumber *) palloc(nbytes); + v->spl_nleft = v->spl_nright = 0; + + for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) + for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) { + size_waste = hemdist( *GETENTRY(entryvec,j), *GETENTRY(entryvec,k) ); + if (size_waste > waste) { + waste = size_waste; + seed_1 = k; + seed_2 = j; + } + } + + + if (seed_1 == 0 || seed_2 == 0) { + seed_1 = 1; + seed_2 = 2; + } + + datum_l = (TPQTGist*)palloc( sizeof(TPQTGist) ); + *datum_l=*GETENTRY(entryvec,seed_1); + datum_r = (TPQTGist*)palloc( sizeof(TPQTGist) ); + *datum_r=*GETENTRY(entryvec,seed_2); + + + maxoff = OffsetNumberNext(maxoff); + costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); + for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) { + costvector[j - 1].pos = j; + size_alpha = hemdist( *GETENTRY(entryvec,seed_1), *GETENTRY(entryvec,j) ); + size_beta = hemdist( *GETENTRY(entryvec,seed_2), *GETENTRY(entryvec,j) ); + costvector[j - 1].cost = abs(size_alpha - size_beta); + } + qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); + + for (k = 0; k < maxoff; k++) { + j = costvector[k].pos; + if ( j == seed_1 ) { + *left++ = j; + v->spl_nleft++; + continue; + } else if ( j == seed_2 ) { + *right++ = j; + v->spl_nright++; + continue; + } + size_alpha = hemdist( *datum_l, *GETENTRY(entryvec,j) ); + size_beta = hemdist( *datum_r, *GETENTRY(entryvec,j) ); + + if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05)) { + *datum_l |= *GETENTRY(entryvec,j); + *left++ = j; + v->spl_nleft++; + } else { + *datum_r |= *GETENTRY(entryvec,j); + *right++ = j; + v->spl_nright++; + } + } + + *right = *left = FirstOffsetNumber; + v->spl_ldatum = PointerGetDatum(datum_l); + v->spl_rdatum = PointerGetDatum(datum_r); + + PG_RETURN_POINTER(v); +} + + |