diff options
Diffstat (limited to 'src/backend/utils/adt/tsgistidx.c')
-rw-r--r-- | src/backend/utils/adt/tsgistidx.c | 274 |
1 files changed, 153 insertions, 121 deletions
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c index 8320dcda800..2e0bc3ebd07 100644 --- a/src/backend/utils/adt/tsgistidx.c +++ b/src/backend/utils/adt/tsgistidx.c @@ -16,6 +16,7 @@ #include "access/gist.h" #include "access/heaptoast.h" +#include "access/reloptions.h" #include "lib/qunique.h" #include "port/pg_bitutils.h" #include "tsearch/ts_utils.h" @@ -23,17 +24,25 @@ #include "utils/pg_crc.h" -#define SIGLENINT 31 /* >121 => key will toast, so it will not work - * !!! */ +/* tsvector_ops opclass options */ +typedef struct +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + int siglen; /* signature length */ +} GistTsVectorOptions; -#define SIGLEN ( sizeof(int32) * SIGLENINT ) -#define SIGLENBIT (SIGLEN * BITS_PER_BYTE) +#define SIGLEN_DEFAULT (31 * 4) +#define SIGLEN_MAX GISTMaxIndexKeySize +#define GET_SIGLEN() (PG_HAS_OPCLASS_OPTIONS() ? \ + ((GistTsVectorOptions *) PG_GET_OPCLASS_OPTIONS())->siglen : \ + SIGLEN_DEFAULT) + +#define SIGLENBIT(siglen) ((siglen) * BITS_PER_BYTE) -typedef char BITVEC[SIGLEN]; typedef char *BITVECP; -#define LOOPBYTE \ - for(i=0;i<SIGLEN;i++) +#define LOOPBYTE(siglen) \ + for (i = 0; i < siglen; i++) #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITS_PER_BYTE ) ) ) #define GETBITBYTE(x,i) ( ((char)(x)) >> (i) & 0x01 ) @@ -41,8 +50,8 @@ typedef char *BITVECP; #define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITS_PER_BYTE ) ) #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITS_PER_BYTE )) & 0x01 ) -#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT) -#define HASH(sign, val) SETBIT((sign), HASHVAL(val)) +#define HASHVAL(val, siglen) (((unsigned int)(val)) % SIGLENBIT(siglen)) +#define HASH(sign, val, siglen) SETBIT((sign), HASHVAL(val, siglen)) #define GETENTRY(vec,pos) ((SignTSVector *) DatumGetPointer((vec)->vector[(pos)].key)) @@ -66,13 +75,14 @@ typedef struct #define ISALLTRUE(x) ( ((SignTSVector*)(x))->flag & ALLISTRUE ) #define GTHDRSIZE ( VARHDRSZ + sizeof(int32) ) -#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int32)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) ) +#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int32)) : (((flag) & ALLISTRUE) ? 0 : (len)) ) ) #define GETSIGN(x) ( (BITVECP)( (char*)(x)+GTHDRSIZE ) ) +#define GETSIGLEN(x)( VARSIZE(x) - GTHDRSIZE ) #define GETARR(x) ( (int32*)( (char*)(x)+GTHDRSIZE ) ) #define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int32) ) -static int32 sizebitvec(BITVECP sign); +static int32 sizebitvec(BITVECP sign, int siglen); Datum gtsvectorin(PG_FUNCTION_ARGS) @@ -103,9 +113,10 @@ gtsvectorout(PG_FUNCTION_ARGS) sprintf(outbuf, ARROUTSTR, (int) ARRNELEM(key)); else { - int cnttrue = (ISALLTRUE(key)) ? SIGLENBIT : sizebitvec(GETSIGN(key)); + int siglen = GETSIGLEN(key); + int cnttrue = (ISALLTRUE(key)) ? SIGLENBIT(siglen) : sizebitvec(GETSIGN(key), siglen); - sprintf(outbuf, SINGOUTSTR, cnttrue, (int) SIGLENBIT - cnttrue); + sprintf(outbuf, SINGOUTSTR, cnttrue, (int) SIGLENBIT(siglen) - cnttrue); } PG_FREE_IF_COPY(key, 0); @@ -124,36 +135,49 @@ compareint(const void *va, const void *vb) } static void -makesign(BITVECP sign, SignTSVector *a) +makesign(BITVECP sign, SignTSVector *a, int siglen) { int32 k, len = ARRNELEM(a); int32 *ptr = GETARR(a); - MemSet((void *) sign, 0, sizeof(BITVEC)); + MemSet((void *) sign, 0, siglen); for (k = 0; k < len; k++) - HASH(sign, ptr[k]); + HASH(sign, ptr[k], siglen); } +static SignTSVector * +gtsvector_alloc(int flag, int len, BITVECP sign) +{ + int size = CALCGTSIZE(flag, len); + SignTSVector *res = palloc(size); + + SET_VARSIZE(res, size); + res->flag = flag; + + if ((flag & (SIGNKEY | ALLISTRUE)) == SIGNKEY && sign) + memcpy(GETSIGN(res), sign, len); + + return res; +} + + Datum gtsvector_compress(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + int siglen = GET_SIGLEN(); GISTENTRY *retval = entry; if (entry->leafkey) { /* tsvector */ - SignTSVector *res; TSVector val = DatumGetTSVector(entry->key); + SignTSVector *res = gtsvector_alloc(ARRKEY, val->size, NULL); int32 len; int32 *arr; WordEntry *ptr = ARRPTR(val); char *words = STRPTR(val); - len = CALCGTSIZE(ARRKEY, val->size); - res = (SignTSVector *) palloc(len); - SET_VARSIZE(res, len); - res->flag = ARRKEY; arr = GETARR(res); len = val->size; while (len--) @@ -185,13 +209,9 @@ gtsvector_compress(PG_FUNCTION_ARGS) /* make signature, if array is too long */ if (VARSIZE(res) > TOAST_INDEX_TARGET) { - SignTSVector *ressign; + SignTSVector *ressign = gtsvector_alloc(SIGNKEY, siglen, NULL); - len = CALCGTSIZE(SIGNKEY, 0); - ressign = (SignTSVector *) palloc(len); - SET_VARSIZE(ressign, len); - ressign->flag = SIGNKEY; - makesign(GETSIGN(ressign), res); + makesign(GETSIGN(ressign), res, siglen); res = ressign; } @@ -203,22 +223,17 @@ gtsvector_compress(PG_FUNCTION_ARGS) else if (ISSIGNKEY(DatumGetPointer(entry->key)) && !ISALLTRUE(DatumGetPointer(entry->key))) { - int32 i, - len; + int32 i; SignTSVector *res; BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); - LOOPBYTE + LOOPBYTE(siglen) { if ((sign[i] & 0xff) != 0xff) PG_RETURN_POINTER(retval); } - len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0); - res = (SignTSVector *) palloc(len); - SET_VARSIZE(res, len); - res->flag = SIGNKEY | ALLISTRUE; - + res = gtsvector_alloc(SIGNKEY | ALLISTRUE, siglen, sign); retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, @@ -292,12 +307,14 @@ checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data) static bool checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data) { + void *key = (SignTSVector *) checkval; + /* * we are not able to find a prefix in signature tree */ if (val->prefix) return true; - return GETBIT(checkval, HASHVAL(val->valcrc)); + return GETBIT(GETSIGN(key), HASHVAL(val->valcrc, GETSIGLEN(key))); } Datum @@ -324,7 +341,7 @@ gtsvector_consistent(PG_FUNCTION_ARGS) /* since signature is lossy, cannot specify CALC_NOT here */ PG_RETURN_BOOL(TS_execute(GETQUERY(query), - (void *) GETSIGN(key), + key, TS_EXEC_PHRASE_NO_POS, checkcondition_bit)); } @@ -342,7 +359,7 @@ gtsvector_consistent(PG_FUNCTION_ARGS) } static int32 -unionkey(BITVECP sbase, SignTSVector *add) +unionkey(BITVECP sbase, SignTSVector *add, int siglen) { int32 i; @@ -353,7 +370,9 @@ unionkey(BITVECP sbase, SignTSVector *add) if (ISALLTRUE(add)) return 1; - LOOPBYTE + Assert(GETSIGLEN(add) == siglen); + + LOOPBYTE(siglen) sbase[i] |= sadd[i]; } else @@ -361,7 +380,7 @@ unionkey(BITVECP sbase, SignTSVector *add) int32 *ptr = GETARR(add); for (i = 0; i < ARRNELEM(add); i++) - HASH(sbase, ptr[i]); + HASH(sbase, ptr[i], siglen); } return 0; } @@ -372,30 +391,24 @@ gtsvector_union(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); int *size = (int *) PG_GETARG_POINTER(1); - BITVEC base; - int32 i, - len; - int32 flag = 0; - SignTSVector *result; + int siglen = GET_SIGLEN(); + SignTSVector *result = gtsvector_alloc(SIGNKEY, siglen, NULL); + BITVECP base = GETSIGN(result); + int32 i; + + memset(base, 0, siglen); - MemSet((void *) base, 0, sizeof(BITVEC)); for (i = 0; i < entryvec->n; i++) { - if (unionkey(base, GETENTRY(entryvec, i))) + if (unionkey(base, GETENTRY(entryvec, i), siglen)) { - flag = ALLISTRUE; + result->flag |= ALLISTRUE; + SET_VARSIZE(result, CALCGTSIZE(result->flag, siglen)); break; } } - flag |= SIGNKEY; - len = CALCGTSIZE(flag, 0); - result = (SignTSVector *) palloc(len); - *size = len; - SET_VARSIZE(result, len); - result->flag = flag; - if (!ISALLTRUE(result)) - memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC)); + *size = VARSIZE(result); PG_RETURN_POINTER(result); } @@ -406,6 +419,7 @@ gtsvector_same(PG_FUNCTION_ARGS) SignTSVector *a = (SignTSVector *) PG_GETARG_POINTER(0); SignTSVector *b = (SignTSVector *) PG_GETARG_POINTER(1); bool *result = (bool *) PG_GETARG_POINTER(2); + int siglen = GET_SIGLEN(); if (ISSIGNKEY(a)) { /* then b also ISSIGNKEY */ @@ -421,8 +435,10 @@ gtsvector_same(PG_FUNCTION_ARGS) BITVECP sa = GETSIGN(a), sb = GETSIGN(b); + Assert(GETSIGLEN(a) == siglen && GETSIGLEN(b) == siglen); + *result = true; - LOOPBYTE + LOOPBYTE(siglen) { if (sa[i] != sb[i]) { @@ -459,19 +475,19 @@ gtsvector_same(PG_FUNCTION_ARGS) } static int32 -sizebitvec(BITVECP sign) +sizebitvec(BITVECP sign, int siglen) { - return pg_popcount(sign, SIGLEN); + return pg_popcount(sign, siglen); } static int -hemdistsign(BITVECP a, BITVECP b) +hemdistsign(BITVECP a, BITVECP b, int siglen) { int i, diff, dist = 0; - LOOPBYTE + LOOPBYTE(siglen) { diff = (unsigned char) (a[i] ^ b[i]); /* Using the popcount functions here isn't likely to win */ @@ -483,17 +499,22 @@ hemdistsign(BITVECP a, BITVECP b) static int hemdist(SignTSVector *a, SignTSVector *b) { + int siglena = GETSIGLEN(a); + int siglenb = GETSIGLEN(b); + if (ISALLTRUE(a)) { if (ISALLTRUE(b)) return 0; else - return SIGLENBIT - sizebitvec(GETSIGN(b)); + return SIGLENBIT(siglenb) - sizebitvec(GETSIGN(b), siglenb); } else if (ISALLTRUE(b)) - return SIGLENBIT - sizebitvec(GETSIGN(a)); + return SIGLENBIT(siglena) - sizebitvec(GETSIGN(a), siglena); + + Assert(siglena == siglenb); - return hemdistsign(GETSIGN(a), GETSIGN(b)); + return hemdistsign(GETSIGN(a), GETSIGN(b), siglena); } Datum @@ -502,6 +523,7 @@ gtsvector_penalty(PG_FUNCTION_ARGS) GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */ GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1); float *penalty = (float *) PG_GETARG_POINTER(2); + int siglen = GET_SIGLEN(); SignTSVector *origval = (SignTSVector *) DatumGetPointer(origentry->key); SignTSVector *newval = (SignTSVector *) DatumGetPointer(newentry->key); BITVECP orig = GETSIGN(origval); @@ -510,14 +532,22 @@ gtsvector_penalty(PG_FUNCTION_ARGS) if (ISARRKEY(newval)) { - BITVEC sign; + BITVECP sign = palloc(siglen); - makesign(sign, newval); + makesign(sign, newval, siglen); if (ISALLTRUE(origval)) - *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1); + { + int siglenbit = SIGLENBIT(siglen); + + *penalty = + (float) (siglenbit - sizebitvec(sign, siglen)) / + (float) (siglenbit + 1); + } else - *penalty = hemdistsign(sign, orig); + *penalty = hemdistsign(sign, orig, siglen); + + pfree(sign); } else *penalty = hemdist(origval, newval); @@ -527,19 +557,19 @@ gtsvector_penalty(PG_FUNCTION_ARGS) typedef struct { bool allistrue; - BITVEC sign; + BITVECP sign; } CACHESIGN; static void -fillcache(CACHESIGN *item, SignTSVector *key) +fillcache(CACHESIGN *item, SignTSVector *key, int siglen) { item->allistrue = false; if (ISARRKEY(key)) - makesign(item->sign, key); + makesign(item->sign, key, siglen); else if (ISALLTRUE(key)) item->allistrue = true; else - memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC)); + memcpy((void *) item->sign, (void *) GETSIGN(key), siglen); } #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) @@ -563,19 +593,19 @@ comparecost(const void *va, const void *vb) static int -hemdistcache(CACHESIGN *a, CACHESIGN *b) +hemdistcache(CACHESIGN *a, CACHESIGN *b, int siglen) { if (a->allistrue) { if (b->allistrue) return 0; else - return SIGLENBIT - sizebitvec(b->sign); + return SIGLENBIT(siglen) - sizebitvec(b->sign, siglen); } else if (b->allistrue) - return SIGLENBIT - sizebitvec(a->sign); + return SIGLENBIT(siglen) - sizebitvec(a->sign, siglen); - return hemdistsign(a->sign, b->sign); + return hemdistsign(a->sign, b->sign, siglen); } Datum @@ -583,6 +613,7 @@ gtsvector_picksplit(PG_FUNCTION_ARGS) { GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); + int siglen = GET_SIGLEN(); OffsetNumber k, j; SignTSVector *datum_l, @@ -602,6 +633,7 @@ gtsvector_picksplit(PG_FUNCTION_ARGS) BITVECP ptr; int i; CACHESIGN *cache; + char *cache_sign; SPLITCOST *costvector; maxoff = entryvec->n - 2; @@ -610,16 +642,22 @@ gtsvector_picksplit(PG_FUNCTION_ARGS) v->spl_right = (OffsetNumber *) palloc(nbytes); cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2)); - fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber)); + cache_sign = palloc(siglen * (maxoff + 2)); + + for (j = 0; j < maxoff + 2; j++) + cache[j].sign = &cache_sign[siglen * j]; + + fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber), + siglen); for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) { for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) { if (k == FirstOffsetNumber) - fillcache(&cache[j], GETENTRY(entryvec, j)); + fillcache(&cache[j], GETENTRY(entryvec, j), siglen); - size_waste = hemdistcache(&(cache[j]), &(cache[k])); + size_waste = hemdistcache(&(cache[j]), &(cache[k]), siglen); if (size_waste > waste) { waste = size_waste; @@ -641,44 +679,21 @@ gtsvector_picksplit(PG_FUNCTION_ARGS) } /* form initial .. */ - if (cache[seed_1].allistrue) - { - datum_l = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); - SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); - datum_l->flag = SIGNKEY | ALLISTRUE; - } - else - { - datum_l = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY, 0)); - SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0)); - datum_l->flag = SIGNKEY; - memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC)); - } - if (cache[seed_2].allistrue) - { - datum_r = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); - SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); - datum_r->flag = SIGNKEY | ALLISTRUE; - } - else - { - datum_r = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY, 0)); - SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0)); - datum_r->flag = SIGNKEY; - memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC)); - } - + datum_l = gtsvector_alloc(SIGNKEY | (cache[seed_1].allistrue ? ALLISTRUE : 0), + siglen, cache[seed_1].sign); + datum_r = gtsvector_alloc(SIGNKEY | (cache[seed_2].allistrue ? ALLISTRUE : 0), + siglen, cache[seed_2].sign); union_l = GETSIGN(datum_l); union_r = GETSIGN(datum_r); maxoff = OffsetNumberNext(maxoff); - fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff)); + fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff), siglen); /* sort before ... */ costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) { costvector[j - 1].pos = j; - size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j])); - size_beta = hemdistcache(&(cache[seed_2]), &(cache[j])); + size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]), siglen); + size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]), siglen); costvector[j - 1].cost = Abs(size_alpha - size_beta); } qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); @@ -704,36 +719,40 @@ gtsvector_picksplit(PG_FUNCTION_ARGS) if (ISALLTRUE(datum_l) && cache[j].allistrue) size_alpha = 0; else - size_alpha = SIGLENBIT - sizebitvec((cache[j].allistrue) ? - GETSIGN(datum_l) : - GETSIGN(cache[j].sign)); + size_alpha = SIGLENBIT(siglen) - + sizebitvec((cache[j].allistrue) ? + GETSIGN(datum_l) : + GETSIGN(cache[j].sign), + siglen); } else - size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l)); + size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l), siglen); if (ISALLTRUE(datum_r) || cache[j].allistrue) { if (ISALLTRUE(datum_r) && cache[j].allistrue) size_beta = 0; else - size_beta = SIGLENBIT - sizebitvec((cache[j].allistrue) ? - GETSIGN(datum_r) : - GETSIGN(cache[j].sign)); + size_beta = SIGLENBIT(siglen) - + sizebitvec((cache[j].allistrue) ? + GETSIGN(datum_r) : + GETSIGN(cache[j].sign), + siglen); } else - size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r)); + size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r), siglen); if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) { if (ISALLTRUE(datum_l) || cache[j].allistrue) { if (!ISALLTRUE(datum_l)) - MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC)); + MemSet((void *) GETSIGN(datum_l), 0xff, siglen); } else { ptr = cache[j].sign; - LOOPBYTE + LOOPBYTE(siglen) union_l[i] |= ptr[i]; } *left++ = j; @@ -744,12 +763,12 @@ gtsvector_picksplit(PG_FUNCTION_ARGS) if (ISALLTRUE(datum_r) || cache[j].allistrue) { if (!ISALLTRUE(datum_r)) - MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC)); + MemSet((void *) GETSIGN(datum_r), 0xff, siglen); } else { ptr = cache[j].sign; - LOOPBYTE + LOOPBYTE(siglen) union_r[i] |= ptr[i]; } *right++ = j; @@ -776,3 +795,16 @@ gtsvector_consistent_oldsig(PG_FUNCTION_ARGS) { return gtsvector_consistent(fcinfo); } + +Datum +gtsvector_options(PG_FUNCTION_ARGS) +{ + local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0); + + init_local_reloptions(relopts, sizeof(GistTsVectorOptions)); + add_local_int_reloption(relopts, "siglen", "signature length", + SIGLEN_DEFAULT, 1, SIGLEN_MAX, + offsetof(GistTsVectorOptions, siglen)); + + PG_RETURN_VOID(); +} |