aboutsummaryrefslogtreecommitdiff
path: root/contrib/pg_trgm/trgm_gist.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/pg_trgm/trgm_gist.c')
-rw-r--r--contrib/pg_trgm/trgm_gist.c152
1 files changed, 128 insertions, 24 deletions
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c
index 605d7ea3569..178f073755b 100644
--- a/contrib/pg_trgm/trgm_gist.c
+++ b/contrib/pg_trgm/trgm_gist.c
@@ -8,6 +8,25 @@
#include "access/skey.h"
+typedef struct
+{
+ /* most recent inputs to gtrgm_consistent */
+ StrategyNumber strategy;
+ text *query;
+ /* extracted trigrams for query */
+ TRGM *trigrams;
+ /* if a regex operator, the extracted graph */
+ TrgmPackedGraph *graph;
+
+ /*
+ * The "query" and "trigrams" are stored in the same palloc block as this
+ * cache struct, at MAXALIGN'ed offsets. The graph however isn't.
+ */
+} gtrgm_consistent_cache;
+
+#define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
+
+
PG_FUNCTION_INFO_V1(gtrgm_in);
Datum gtrgm_in(PG_FUNCTION_ARGS);
@@ -38,8 +57,6 @@ Datum gtrgm_penalty(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtrgm_picksplit);
Datum gtrgm_picksplit(PG_FUNCTION_ARGS);
-#define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
-
/* Number of one-bits in an unsigned byte */
static const uint8 number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
@@ -191,24 +208,30 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
TRGM *qtrg;
bool res;
Size querysize = VARSIZE(query);
- char *cache = (char *) fcinfo->flinfo->fn_extra,
- *cachedQuery = cache + MAXALIGN(sizeof(StrategyNumber));
+ gtrgm_consistent_cache *cache;
/*
- * Store both the strategy number and extracted trigrams in cache, because
- * trigram extraction is relatively CPU-expensive. We must include
- * strategy number because trigram extraction depends on strategy.
+ * We keep the extracted trigrams in cache, because trigram extraction is
+ * relatively CPU-expensive. When trying to reuse a cached value, check
+ * strategy number not just query itself, because trigram extraction
+ * depends on strategy.
*
- * The cached structure contains the strategy number, then the input query
- * (starting at a MAXALIGN boundary), then the TRGM value (also starting
- * at a MAXALIGN boundary).
+ * The cached structure is a single palloc chunk containing the
+ * gtrgm_consistent_cache header, then the input query (starting at a
+ * MAXALIGN boundary), then the TRGM value (also starting at a MAXALIGN
+ * boundary). However we don't try to include the regex graph (if any) in
+ * that struct. (XXX currently, this approach can leak regex graphs
+ * across index rescans. Not clear if that's worth fixing.)
*/
+ cache = (gtrgm_consistent_cache *) fcinfo->flinfo->fn_extra;
if (cache == NULL ||
- strategy != *((StrategyNumber *) cache) ||
- VARSIZE(cachedQuery) != querysize ||
- memcmp(cachedQuery, query, querysize) != 0)
+ cache->strategy != strategy ||
+ VARSIZE(cache->query) != querysize ||
+ memcmp((char *) cache->query, (char *) query, querysize) != 0)
{
- char *newcache;
+ gtrgm_consistent_cache *newcache;
+ TrgmPackedGraph *graph = NULL;
+ Size qtrgsize;
switch (strategy)
{
@@ -225,28 +248,58 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
qtrg = generate_wildcard_trgm(VARDATA(query),
querysize - VARHDRSZ);
break;
+ case RegExpICaseStrategyNumber:
+#ifndef IGNORECASE
+ elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
+#endif
+ /* FALL THRU */
+ case RegExpStrategyNumber:
+ qtrg = createTrgmNFA(query, PG_GET_COLLATION(),
+ &graph, fcinfo->flinfo->fn_mcxt);
+ /* just in case an empty array is returned ... */
+ if (qtrg && ARRNELEM(qtrg) <= 0)
+ {
+ pfree(qtrg);
+ qtrg = NULL;
+ }
+ break;
default:
elog(ERROR, "unrecognized strategy number: %d", strategy);
qtrg = NULL; /* keep compiler quiet */
break;
}
- newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
- MAXALIGN(sizeof(StrategyNumber)) +
- MAXALIGN(querysize) +
- VARSIZE(qtrg));
- cachedQuery = newcache + MAXALIGN(sizeof(StrategyNumber));
+ qtrgsize = qtrg ? VARSIZE(qtrg) : 0;
- *((StrategyNumber *) newcache) = strategy;
- memcpy(cachedQuery, query, querysize);
- memcpy(cachedQuery + MAXALIGN(querysize), qtrg, VARSIZE(qtrg));
+ newcache = (gtrgm_consistent_cache *)
+ MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ MAXALIGN(sizeof(gtrgm_consistent_cache)) +
+ MAXALIGN(querysize) +
+ qtrgsize);
+
+ newcache->strategy = strategy;
+ newcache->query = (text *)
+ ((char *) newcache + MAXALIGN(sizeof(gtrgm_consistent_cache)));
+ memcpy((char *) newcache->query, (char *) query, querysize);
+ if (qtrg)
+ {
+ newcache->trigrams = (TRGM *)
+ ((char *) newcache->query + MAXALIGN(querysize));
+ memcpy((char *) newcache->trigrams, (char *) qtrg, qtrgsize);
+ /* release qtrg in case it was made in fn_mcxt */
+ pfree(qtrg);
+ }
+ else
+ newcache->trigrams = NULL;
+ newcache->graph = graph;
if (cache)
pfree(cache);
- fcinfo->flinfo->fn_extra = newcache;
+ fcinfo->flinfo->fn_extra = (void *) newcache;
+ cache = newcache;
}
- qtrg = (TRGM *) (cachedQuery + MAXALIGN(querysize));
+ qtrg = cache->trigrams;
switch (strategy)
{
@@ -317,6 +370,57 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
}
}
break;
+ case RegExpICaseStrategyNumber:
+#ifndef IGNORECASE
+ elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
+#endif
+ /* FALL THRU */
+ case RegExpStrategyNumber:
+ /* Regexp search is inexact */
+ *recheck = true;
+
+ /* Check regex match as much as we can with available info */
+ if (qtrg)
+ {
+ if (GIST_LEAF(entry))
+ { /* all leafs contains orig trgm */
+ bool *check;
+
+ check = trgm_presence_map(qtrg, key);
+ res = trigramsMatchGraph(cache->graph, check);
+ pfree(check);
+ }
+ else if (ISALLTRUE(key))
+ { /* non-leaf contains signature */
+ res = true;
+ }
+ else
+ { /* non-leaf contains signature */
+ int32 k,
+ tmp = 0,
+ len = ARRNELEM(qtrg);
+ trgm *ptr = GETARR(qtrg);
+ BITVECP sign = GETSIGN(key);
+
+ /* descend only if at least one trigram is present */
+ res = false;
+ for (k = 0; k < len; k++)
+ {
+ CPTRGM(((char *) &tmp), ptr + k);
+ if (GETBIT(sign, HASHVAL(tmp)))
+ {
+ res = true;
+ break;
+ }
+ }
+ }
+ }
+ else
+ {
+ /* trigram-free query must be rechecked everywhere */
+ res = true;
+ }
+ break;
default:
elog(ERROR, "unrecognized strategy number: %d", strategy);
res = false; /* keep compiler quiet */