aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTeodor Sigaev <teodor@sigaev.ru>2015-07-20 18:18:48 +0300
committerTeodor Sigaev <teodor@sigaev.ru>2015-07-20 18:18:48 +0300
commit97f3014647a5bd570032abd2b809d3233003f13f (patch)
tree126399bfdfdb85b03998a489325d4e484b52a1a2
parent1a5118008003b3c42f5cbb37980dabdb6a718e6f (diff)
downloadpostgresql-97f3014647a5bd570032abd2b809d3233003f13f.tar.gz
postgresql-97f3014647a5bd570032abd2b809d3233003f13f.zip
This supports the triconsistent function for pg_trgm GIN opclass
to make it faster to implement indexed queries where some keys are common and some are rare. Patch by Jeff Janes
-rw-r--r--contrib/pg_trgm/Makefile2
-rw-r--r--contrib/pg_trgm/pg_trgm--1.1--1.2.sql12
-rw-r--r--contrib/pg_trgm/pg_trgm--1.2.sql (renamed from contrib/pg_trgm/pg_trgm--1.1.sql)12
-rw-r--r--contrib/pg_trgm/pg_trgm.control2
-rw-r--r--contrib/pg_trgm/trgm_gin.c92
5 files changed, 117 insertions, 3 deletions
diff --git a/contrib/pg_trgm/Makefile b/contrib/pg_trgm/Makefile
index e081a1e5e9f..1e387536221 100644
--- a/contrib/pg_trgm/Makefile
+++ b/contrib/pg_trgm/Makefile
@@ -4,7 +4,7 @@ MODULE_big = pg_trgm
OBJS = trgm_op.o trgm_gist.o trgm_gin.o trgm_regexp.o $(WIN32RES)
EXTENSION = pg_trgm
-DATA = pg_trgm--1.1.sql pg_trgm--1.0--1.1.sql pg_trgm--unpackaged--1.0.sql
+DATA = pg_trgm--1.2.sql pg_trgm--1.0--1.1.sql pg_trgm--1.1--1.2.sql pg_trgm--unpackaged--1.0.sql
PGFILEDESC = "pg_trgm - trigram matching"
REGRESS = pg_trgm
diff --git a/contrib/pg_trgm/pg_trgm--1.1--1.2.sql b/contrib/pg_trgm/pg_trgm--1.1--1.2.sql
new file mode 100644
index 00000000000..c101f21061b
--- /dev/null
+++ b/contrib/pg_trgm/pg_trgm--1.1--1.2.sql
@@ -0,0 +1,12 @@
+/* contrib/pg_trgm/pg_trgm--1.1--1.2.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.2'" to load this file. \quit
+
+CREATE FUNCTION gin_trgm_triconsistent(internal, int2, text, int4, internal, internal, internal)
+RETURNS "char"
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
+ FUNCTION 6 (text, text) gin_trgm_triconsistent (internal, int2, text, int4, internal, internal, internal);
diff --git a/contrib/pg_trgm/pg_trgm--1.1.sql b/contrib/pg_trgm/pg_trgm--1.2.sql
index 34b37e47872..03d46d07f98 100644
--- a/contrib/pg_trgm/pg_trgm--1.1.sql
+++ b/contrib/pg_trgm/pg_trgm--1.2.sql
@@ -1,4 +1,4 @@
-/* contrib/pg_trgm/pg_trgm--1.1.sql */
+/* contrib/pg_trgm/pg_trgm--1.2.sql */
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION pg_trgm" to load this file. \quit
@@ -176,3 +176,13 @@ ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
OPERATOR 5 pg_catalog.~ (text, text),
OPERATOR 6 pg_catalog.~* (text, text);
+
+-- Add functions that are new in 9.6 (pg_trgm 1.2).
+
+CREATE FUNCTION gin_trgm_triconsistent(internal, int2, text, int4, internal, internal, internal)
+RETURNS "char"
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
+ FUNCTION 6 (text,text) gin_trgm_triconsistent (internal, int2, text, int4, internal, internal, internal);
diff --git a/contrib/pg_trgm/pg_trgm.control b/contrib/pg_trgm/pg_trgm.control
index 2ac51e6890c..cbf5a186d7e 100644
--- a/contrib/pg_trgm/pg_trgm.control
+++ b/contrib/pg_trgm/pg_trgm.control
@@ -1,5 +1,5 @@
# pg_trgm extension
comment = 'text similarity measurement and index searching based on trigrams'
-default_version = '1.1'
+default_version = '1.2'
module_pathname = '$libdir/pg_trgm'
relocatable = true
diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c
index d524ceaa19e..6a0731d44ea 100644
--- a/contrib/pg_trgm/trgm_gin.c
+++ b/contrib/pg_trgm/trgm_gin.c
@@ -14,6 +14,7 @@ PG_FUNCTION_INFO_V1(gin_extract_trgm);
PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
PG_FUNCTION_INFO_V1(gin_trgm_consistent);
+PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
/*
* This function can only be called if a pre-9.1 version of the GIN operator
@@ -235,3 +236,94 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(res);
}
+
+/*
+ * In all cases, GIN_TRUE is at least as favorable to inclusion as
+ * GIN_MAYBE. If no better option is available, simply treat
+ * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
+ * consistent function.
+ */
+Datum
+gin_trgm_triconsistent(PG_FUNCTION_ARGS)
+{
+ GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
+ StrategyNumber strategy = PG_GETARG_UINT16(1);
+
+ /* text *query = PG_GETARG_TEXT_P(2); */
+ int32 nkeys = PG_GETARG_INT32(3);
+ Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
+ GinTernaryValue res = GIN_MAYBE;
+ int32 i,
+ ntrue;
+ bool *boolcheck;
+
+ switch (strategy)
+ {
+ case SimilarityStrategyNumber:
+ /* Count the matches */
+ ntrue = 0;
+ for (i = 0; i < nkeys; i++)
+ {
+ if (check[i] != GIN_FALSE)
+ ntrue++;
+ }
+#ifdef DIVUNION
+ res = (nkeys == ntrue) ? GIN_MAYBE : (((((float4) ntrue) / ((float4) (nkeys - ntrue))) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE);
+#else
+ res = (nkeys == 0) ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE);
+#endif
+ break;
+ case ILikeStrategyNumber:
+#ifndef IGNORECASE
+ elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
+#endif
+ /* FALL THRU */
+ case LikeStrategyNumber:
+ /* Check if all extracted trigrams are presented. */
+ res = GIN_MAYBE;
+ for (i = 0; i < nkeys; i++)
+ {
+ if (check[i] == GIN_FALSE)
+ {
+ res = GIN_FALSE;
+ break;
+ }
+ }
+ break;
+ case RegExpICaseStrategyNumber:
+#ifndef IGNORECASE
+ elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
+#endif
+ /* FALL THRU */
+ case RegExpStrategyNumber:
+ if (nkeys < 1)
+ {
+ /* Regex processing gave no result: do full index scan */
+ res = GIN_MAYBE;
+ }
+ else
+ {
+ /*
+ * As trigramsMatchGraph implements a montonic boolean function,
+ * promoting all GIN_MAYBE keys to GIN_TRUE will give a
+ * conservative result.
+ */
+ boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
+ for (i = 0; i < nkeys; i++)
+ boolcheck[i] = (check[i] != GIN_FALSE);
+ if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
+ boolcheck))
+ res = GIN_FALSE;
+ pfree(boolcheck);
+ }
+ break;
+ default:
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+ res = GIN_FALSE; /* keep compiler quiet */
+ break;
+ }
+
+ /* All cases served by this function are inexact */
+ Assert(res != GIN_TRUE);
+ PG_RETURN_GIN_TERNARY_VALUE(res);
+}