diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2010-12-04 00:16:21 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2010-12-04 00:16:21 -0500 |
commit | b525bf771e31a2254f28bf25c6ed7987d64c8afb (patch) | |
tree | 65583461edd171150f868f7f769635f43976d807 /contrib/pg_trgm/trgm_gist.c | |
parent | b576757d7ee064ada5351c2e6a36c2f7234aa1d4 (diff) | |
download | postgresql-b525bf771e31a2254f28bf25c6ed7987d64c8afb.tar.gz postgresql-b525bf771e31a2254f28bf25c6ed7987d64c8afb.zip |
Add KNNGIST support to contrib/pg_trgm.
Teodor Sigaev, with some revision by Tom
Diffstat (limited to 'contrib/pg_trgm/trgm_gist.c')
-rw-r--r-- | contrib/pg_trgm/trgm_gist.c | 140 |
1 files changed, 110 insertions, 30 deletions
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c index 567b2f878ff..d9f3d40c179 100644 --- a/contrib/pg_trgm/trgm_gist.c +++ b/contrib/pg_trgm/trgm_gist.c @@ -1,15 +1,19 @@ /* * contrib/pg_trgm/trgm_gist.c */ +#include "postgres.h" + #include "trgm.h" #include "access/gist.h" #include "access/itup.h" +#include "access/skey.h" #include "access/tuptoaster.h" #include "storage/bufpage.h" #include "utils/array.h" #include "utils/builtins.h" + PG_FUNCTION_INFO_V1(gtrgm_in); Datum gtrgm_in(PG_FUNCTION_ARGS); @@ -25,6 +29,9 @@ Datum gtrgm_decompress(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(gtrgm_consistent); Datum gtrgm_consistent(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(gtrgm_distance); +Datum gtrgm_distance(PG_FUNCTION_ARGS); + PG_FUNCTION_INFO_V1(gtrgm_union); Datum gtrgm_union(PG_FUNCTION_ARGS); @@ -159,18 +166,35 @@ gtrgm_decompress(PG_FUNCTION_ARGS) } } +static int4 +cnt_sml_sign_common(TRGM *qtrg, BITVECP sign) +{ + int4 count = 0; + int4 k, + len = ARRNELEM(qtrg); + trgm *ptr = GETARR(qtrg); + int4 tmp = 0; + + for (k = 0; k < len; k++) + { + CPTRGM(((char *) &tmp), ptr + k); + count += GETBIT(sign, HASHVAL(tmp)); + } + + return count; +} + Datum gtrgm_consistent(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); text *query = PG_GETARG_TEXT_P(1); - - /* StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); */ + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); /* Oid subtype = PG_GETARG_OID(3); */ bool *recheck = (bool *) PG_GETARG_POINTER(4); TRGM *key = (TRGM *) DatumGetPointer(entry->key); TRGM *qtrg; - bool res = false; + bool res; char *cache = (char *) fcinfo->flinfo->fn_extra; /* All cases served by this function are exact */ @@ -193,39 +217,95 @@ gtrgm_consistent(PG_FUNCTION_ARGS) qtrg = (TRGM *) (cache + MAXALIGN(VARSIZE(query))); - if (GIST_LEAF(entry)) - { /* all leafs contains orig trgm */ - float4 tmpsml = cnt_sml(key, qtrg); + switch (strategy) + { + case SimilarityStrategyNumber: + if (GIST_LEAF(entry)) + { /* all leafs contains orig trgm */ + float4 tmpsml = cnt_sml(key, qtrg); - /* strange bug at freebsd 5.2.1 and gcc 3.3.3 */ - res = (*(int *) &tmpsml == *(int *) &trgm_limit || tmpsml > trgm_limit) ? true : false; + /* strange bug at freebsd 5.2.1 and gcc 3.3.3 */ + res = (*(int *) &tmpsml == *(int *) &trgm_limit || tmpsml > trgm_limit) ? true : false; + } + else if (ISALLTRUE(key)) + { /* non-leaf contains signature */ + res = true; + } + else + { /* non-leaf contains signature */ + int4 count = cnt_sml_sign_common(qtrg, GETSIGN(key)); + int4 len = ARRNELEM(qtrg); + + if (len == 0) + res = false; + else + res = (((((float8) count) / ((float8) len))) >= trgm_limit) ? true : false; + } + break; + default: + elog(ERROR, "unrecognized strategy number: %d", strategy); + res = false; /* keep compiler quiet */ + break; } - else if (ISALLTRUE(key)) - { /* non-leaf contains signature */ - res = true; + + PG_RETURN_BOOL(res); +} + +Datum +gtrgm_distance(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + text *query = PG_GETARG_TEXT_P(1); + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + /* Oid subtype = PG_GETARG_OID(3); */ + TRGM *key = (TRGM *) DatumGetPointer(entry->key); + TRGM *qtrg; + float8 res; + char *cache = (char *) fcinfo->flinfo->fn_extra; + + if (cache == NULL || VARSIZE(cache) != VARSIZE(query) || memcmp(cache, query, VARSIZE(query)) != 0) + { + qtrg = generate_trgm(VARDATA(query), VARSIZE(query) - VARHDRSZ); + + if (cache) + pfree(cache); + + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + MAXALIGN(VARSIZE(query)) + VARSIZE(qtrg)); + cache = (char *) fcinfo->flinfo->fn_extra; + + memcpy(cache, query, VARSIZE(query)); + memcpy(cache + MAXALIGN(VARSIZE(query)), qtrg, VARSIZE(qtrg)); } - else - { /* non-leaf contains signature */ - int4 count = 0; - int4 k, - len = ARRNELEM(qtrg); - trgm *ptr = GETARR(qtrg); - BITVECP sign = GETSIGN(key); - int4 tmp = 0; - for (k = 0; k < len; k++) - { - CPTRGM(((char *) &tmp), ptr + k); - count += GETBIT(sign, HASHVAL(tmp)); - } -#ifdef DIVUNION - res = (len == count) ? true : ((((((float4) count) / ((float4) (len - count)))) >= trgm_limit) ? true : false); -#else - res = (len == 0) ? false : ((((((float4) count) / ((float4) len))) >= trgm_limit) ? true : false); -#endif + qtrg = (TRGM *) (cache + MAXALIGN(VARSIZE(query))); + + switch (strategy) + { + case DistanceStrategyNumber: + if (GIST_LEAF(entry)) + { /* all leafs contains orig trgm */ + res = 1.0 - cnt_sml(key, qtrg); + } + else if (ISALLTRUE(key)) + { /* all leafs contains orig trgm */ + res = 0.0; + } + else + { /* non-leaf contains signature */ + int4 count = cnt_sml_sign_common(qtrg, GETSIGN(key)); + int4 len = ARRNELEM(qtrg); + + res = (len == 0) ? -1.0 : 1.0 - ((float8) count) / ((float8) len); + } + break; + default: + elog(ERROR, "unrecognized strategy number: %d", strategy); + res = 0; /* keep compiler quiet */ + break; } - PG_RETURN_BOOL(res); + PG_RETURN_FLOAT8(res); } static int4 |