diff options
author | Teodor Sigaev <teodor@sigaev.ru> | 2015-12-25 13:05:13 +0300 |
---|---|---|
committer | Teodor Sigaev <teodor@sigaev.ru> | 2015-12-25 13:05:13 +0300 |
commit | 25bfa7efd037a3c44d6a2989d18f55758090e8a9 (patch) | |
tree | 6f670a31c83c3f77a9d3bf28dbcec31a58211cb9 | |
parent | a9246fbf665327870370d1088bfc9efdfd2719ec (diff) | |
download | postgresql-25bfa7efd037a3c44d6a2989d18f55758090e8a9.tar.gz postgresql-25bfa7efd037a3c44d6a2989d18f55758090e8a9.zip |
Improve the gin index scan performance in pg_trgm.
Previous coding assumes too pessimistic upper bound of similarity
in consistent method of GIN.
Author: Fornaroli Christophe with comments by me.
-rw-r--r-- | contrib/pg_trgm/trgm_gin.c | 28 |
1 files changed, 20 insertions, 8 deletions
diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c index 6a0731d44ea..baa4cc70a23 100644 --- a/contrib/pg_trgm/trgm_gin.c +++ b/contrib/pg_trgm/trgm_gin.c @@ -190,11 +190,23 @@ gin_trgm_consistent(PG_FUNCTION_ARGS) if (check[i]) ntrue++; } -#ifdef DIVUNION - res = (nkeys == ntrue) ? true : ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >= trgm_limit) ? true : false); -#else + + /*-------------------- + * If DIVUNION is defined then similarity formula is: + * c / (len1 + len2 - c) + * where c is number of common trigrams and it stands as ntrue in + * this code. Here we don't know value of len2 but we can assume + * that c (ntrue) is a lower bound of len2, so upper bound of + * similarity is: + * c / (len1 + c - c) => c / len1 + * If DIVUNION is not defined then similarity formula is: + * c / max(len1, len2) + * And again, c (ntrue) is a lower bound of len2, but c <= len1 + * just by definition and, consequently, upper bound of + * similarity is just c / len1. + * So, independly on DIVUNION the upper bound formula is the same. + */ res = (nkeys == 0) ? false : ((((((float4) ntrue) / ((float4) nkeys))) >= trgm_limit) ? true : false); -#endif break; case ILikeStrategyNumber: #ifndef IGNORECASE @@ -267,11 +279,11 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS) if (check[i] != GIN_FALSE) ntrue++; } -#ifdef DIVUNION - res = (nkeys == ntrue) ? GIN_MAYBE : (((((float4) ntrue) / ((float4) (nkeys - ntrue))) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE); -#else + + /* + * See comment in gin_trgm_consistent() about * upper bound formula + */ res = (nkeys == 0) ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE); -#endif break; case ILikeStrategyNumber: #ifndef IGNORECASE |