Add SP-GiST (space-partitioned GiST) index access method.

SP-GiST is comparable to GiST in flexibility, but supports non-balanced partitioned search structures rather than balanced trees. As described at PGCon 2011, this new indexing structure can beat GiST in both index build time and query speed for search problems that it is well matched to. There are a number of areas that could still use improvement, but at this point the code seems committable. Teodor Sigaev and Oleg Bartunov, with considerable revisions by Tom Lane
author: Tom Lane <tgl@sss.pgh.pa.us> 2011-12-17 16:41:16 -0500
committer: Tom Lane <tgl@sss.pgh.pa.us> 2011-12-17 16:42:30 -0500
commit: 8daeb5ddd698f661eb118f8e874e7c68cfd6ae09 (patch)
tree: 765599b73e45a6ca5529398489f31a534ab1924e /src/backend/access/spgist/spgtextproc.c
parent: 19fc0fe3ae7861a8b0d3ab8b67bd01fde33bf2da (diff)
download: postgresql-8daeb5ddd698f661eb118f8e874e7c68cfd6ae09.tar.gz
postgresql-8daeb5ddd698f661eb118f8e874e7c68cfd6ae09.zip
1 files changed, 594 insertions, 0 deletions
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c
new file mode 100644
index 00000000000..b6037978425
--- /dev/null
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -0,0 +1,594 @@
+/*-------------------------------------------------------------------------
+ *
+ * spgtextproc.c
+ *	  implementation of compressed-suffix tree over text
+ *
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/spgist/spgtextproc.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/spgist.h"
+#include "catalog/pg_type.h"
+#include "mb/pg_wchar.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/pg_locale.h"
+
+
+/*
+ * In the worst case, a inner tuple in a text suffix tree could have as many
+ * as 256 nodes (one for each possible byte value).  Each node can take 16
+ * bytes on MAXALIGN=8 machines.  The inner tuple must fit on an index page
+ * of size BLCKSZ.  Rather than assuming we know the exact amount of overhead
+ * imposed by page headers, tuple headers, etc, we leave 100 bytes for that
+ * (the actual overhead should be no more than 56 bytes at this writing, so
+ * there is slop in this number).  The upshot is that the maximum safe prefix
+ * length is this:
+ */
+#define SPGIST_MAX_PREFIX_LENGTH	(BLCKSZ - 256 * 16 - 100)
+
+/* Struct for sorting values in picksplit */
+typedef struct spgNodePtr
+{
+	Datum		d;
+	int			i;
+	uint8		c;
+} spgNodePtr;
+
+
+Datum
+spg_text_config(PG_FUNCTION_ARGS)
+{
+	/* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
+	spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
+
+	cfg->prefixType = TEXTOID;
+	cfg->labelType = CHAROID;
+	cfg->longValuesOK = true;	/* suffixing will shorten long values */
+	PG_RETURN_VOID();
+}
+
+/*
+ * Form a text datum from the given not-necessarily-null-terminated string,
+ * using short varlena header format if possible
+ */
+static Datum
+formTextDatum(const char *data, int datalen)
+{
+	char	   *p;
+
+	p = (char *) palloc(datalen + VARHDRSZ);
+
+	if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
+	{
+		SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
+		if (datalen)
+			memcpy(p + VARHDRSZ_SHORT, data, datalen);
+	}
+	else
+	{
+		SET_VARSIZE(p, datalen + VARHDRSZ);
+		memcpy(p + VARHDRSZ, data, datalen);
+	}
+
+	return PointerGetDatum(p);
+}
+
+/*
+ * Find the length of the common prefix of a and b
+ */
+static int
+commonPrefix(const char *a, const char *b, int lena, int lenb)
+{
+	int			i = 0;
+
+	while (i < lena && i < lenb && *a == *b)
+	{
+		a++;
+		b++;
+		i++;
+	}
+
+	return i;
+}
+
+/*
+ * Binary search an array of uint8 datums for a match to c
+ *
+ * On success, *i gets the match location; on failure, it gets where to insert
+ */
+static bool
+searchChar(Datum *nodeLabels, int nNodes, uint8 c, int *i)
+{
+	int			StopLow = 0,
+				StopHigh = nNodes;
+
+	while (StopLow < StopHigh)
+	{
+		int			StopMiddle = (StopLow + StopHigh) >> 1;
+		uint8		middle = DatumGetUInt8(nodeLabels[StopMiddle]);
+
+		if (c < middle)
+			StopHigh = StopMiddle;
+		else if (c > middle)
+			StopLow = StopMiddle + 1;
+		else
+		{
+			*i = StopMiddle;
+			return true;
+		}
+	}
+
+	*i = StopHigh;
+	return false;
+}
+
+Datum
+spg_text_choose(PG_FUNCTION_ARGS)
+{
+	spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
+	spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
+	text	   *inText = DatumGetTextPP(in->datum);
+	char	   *inStr = VARDATA_ANY(inText);
+	int			inSize = VARSIZE_ANY_EXHDR(inText);
+	uint8		nodeChar = '\0';
+	int			i = 0;
+	int			commonLen = 0;
+
+	/* Check for prefix match, set nodeChar to first byte after prefix */
+	if (in->hasPrefix)
+	{
+		text	   *prefixText = DatumGetTextPP(in->prefixDatum);
+		char	   *prefixStr = VARDATA_ANY(prefixText);
+		int			prefixSize = VARSIZE_ANY_EXHDR(prefixText);
+
+		commonLen = commonPrefix(inStr + in->level,
+								 prefixStr,
+								 inSize - in->level,
+								 prefixSize);
+
+		if (commonLen == prefixSize)
+		{
+			if (inSize - in->level > commonLen)
+				nodeChar = *(uint8 *) (inStr + in->level + commonLen);
+			else
+				nodeChar = '\0';
+		}
+		else
+		{
+			/* Must split tuple because incoming value doesn't match prefix */
+			out->resultType = spgSplitTuple;
+
+			if (commonLen == 0)
+			{
+				out->result.splitTuple.prefixHasPrefix = false;
+			}
+			else
+			{
+				out->result.splitTuple.prefixHasPrefix = true;
+				out->result.splitTuple.prefixPrefixDatum =
+					formTextDatum(prefixStr, commonLen);
+			}
+			out->result.splitTuple.nodeLabel =
+				UInt8GetDatum(*(prefixStr + commonLen));
+
+			if (prefixSize - commonLen == 1)
+			{
+				out->result.splitTuple.postfixHasPrefix = false;
+			}
+			else
+			{
+				out->result.splitTuple.postfixHasPrefix = true;
+				out->result.splitTuple.postfixPrefixDatum =
+					formTextDatum(prefixStr + commonLen + 1,
+								  prefixSize - commonLen - 1);
+			}
+
+			PG_RETURN_VOID();
+		}
+	}
+	else if (inSize > in->level)
+	{
+		nodeChar = *(uint8 *) (inStr + in->level);
+	}
+	else
+	{
+		nodeChar = '\0';
+	}
+
+	/* Look up nodeChar in the node label array */
+	if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
+	{
+		/*
+		 * Descend to existing node.  (If in->allTheSame, the core code will
+		 * ignore our nodeN specification here, but that's OK.  We still
+		 * have to provide the correct levelAdd and restDatum values, and
+		 * those are the same regardless of which node gets chosen by core.)
+		 */
+		out->resultType = spgMatchNode;
+		out->result.matchNode.nodeN = i;
+		out->result.matchNode.levelAdd = commonLen + 1;
+		if (inSize - in->level - commonLen - 1 > 0)
+			out->result.matchNode.restDatum =
+				formTextDatum(inStr + in->level + commonLen + 1,
+							  inSize - in->level - commonLen - 1);
+		else
+			out->result.matchNode.restDatum =
+				formTextDatum(NULL, 0);
+	}
+	else if (in->allTheSame)
+	{
+		/*
+		 * Can't use AddNode action, so split the tuple.  The upper tuple
+		 * has the same prefix as before and uses an empty node label for
+		 * the lower tuple.  The lower tuple has no prefix and the same
+		 * node labels as the original tuple.
+		 */
+		out->resultType = spgSplitTuple;
+		out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
+		out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
+		out->result.splitTuple.nodeLabel = UInt8GetDatum('\0');
+		out->result.splitTuple.postfixHasPrefix = false;
+	}
+	else
+	{
+		/* Add a node for the not-previously-seen nodeChar value */
+		out->resultType = spgAddNode;
+		out->result.addNode.nodeLabel = UInt8GetDatum(nodeChar);
+		out->result.addNode.nodeN = i;
+	}
+
+	PG_RETURN_VOID();
+}
+
+/* qsort comparator to sort spgNodePtr structs by "c" */
+static int
+cmpNodePtr(const void *a, const void *b)
+{
+	const spgNodePtr *aa = (const spgNodePtr *) a;
+	const spgNodePtr *bb = (const spgNodePtr *) b;
+
+	if (aa->c == bb->c)
+		return 0;
+	else if (aa->c > bb->c)
+		return 1;
+	else
+		return -1;
+}
+
+Datum
+spg_text_picksplit(PG_FUNCTION_ARGS)
+{
+	spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
+	spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
+	text	   *text0 = DatumGetTextPP(in->datums[0]);
+	int			i,
+				commonLen;
+	spgNodePtr *nodes;
+
+	/* Identify longest common prefix, if any */
+	commonLen = VARSIZE_ANY_EXHDR(text0);
+	for (i = 1; i < in->nTuples && commonLen > 0; i++)
+	{
+		text	   *texti = DatumGetTextPP(in->datums[i]);
+		int			tmp = commonPrefix(VARDATA_ANY(text0),
+									   VARDATA_ANY(texti),
+									   VARSIZE_ANY_EXHDR(text0),
+									   VARSIZE_ANY_EXHDR(texti));
+
+		if (tmp < commonLen)
+			commonLen = tmp;
+	}
+
+	/*
+	 * Limit the prefix length, if necessary, to ensure that the resulting
+	 * inner tuple will fit on a page.
+	 */
+	commonLen = Min(commonLen, SPGIST_MAX_PREFIX_LENGTH);
+
+	/* Set node prefix to be that string, if it's not empty */
+	if (commonLen == 0)
+	{
+		out->hasPrefix = false;
+	}
+	else
+	{
+		out->hasPrefix = true;
+		out->prefixDatum = formTextDatum(VARDATA_ANY(text0), commonLen);
+	}
+
+	/* Extract the node label (first non-common byte) from each value */
+	nodes = (spgNodePtr *) palloc(sizeof(spgNodePtr) * in->nTuples);
+
+	for (i = 0; i < in->nTuples; i++)
+	{
+		text	   *texti = DatumGetTextPP(in->datums[i]);
+
+		if (commonLen < VARSIZE_ANY_EXHDR(texti))
+			nodes[i].c = *(uint8 *) (VARDATA_ANY(texti) + commonLen);
+		else
+			nodes[i].c = '\0';			/* use \0 if string is all common */
+		nodes[i].i = i;
+		nodes[i].d = in->datums[i];
+	}
+
+	/*
+	 * Sort by label bytes so that we can group the values into nodes.  This
+	 * also ensures that the nodes are ordered by label value, allowing the
+	 * use of binary search in searchChar.
+	 */
+	qsort(nodes, in->nTuples, sizeof(*nodes), cmpNodePtr);
+
+	/* And emit results */
+	out->nNodes = 0;
+	out->nodeLabels = (Datum *) palloc(sizeof(Datum) * in->nTuples);
+	out->mapTuplesToNodes = (int *) palloc(sizeof(int) * in->nTuples);
+	out->leafTupleDatums = (Datum *) palloc(sizeof(Datum) * in->nTuples);
+
+	for (i = 0; i < in->nTuples; i++)
+	{
+		text	   *texti = DatumGetTextPP(nodes[i].d);
+		Datum		leafD;
+
+		if (i == 0 || nodes[i].c != nodes[i - 1].c)
+		{
+			out->nodeLabels[out->nNodes] = UInt8GetDatum(nodes[i].c);
+			out->nNodes++;
+		}
+
+		if (commonLen < VARSIZE_ANY_EXHDR(texti))
+			leafD = formTextDatum(VARDATA_ANY(texti) + commonLen + 1,
+								  VARSIZE_ANY_EXHDR(texti) - commonLen - 1);
+		else
+			leafD = formTextDatum(NULL, 0);
+
+		out->leafTupleDatums[nodes[i].i] = leafD;
+		out->mapTuplesToNodes[nodes[i].i] = out->nNodes - 1;
+	}
+
+	PG_RETURN_VOID();
+}
+
+Datum
+spg_text_inner_consistent(PG_FUNCTION_ARGS)
+{
+	spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
+	spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
+	StrategyNumber strategy = in->strategy;
+	text	   *inText;
+	int			inSize;
+	int			i;
+	text	   *reconstrText = NULL;
+	int			maxReconstrLen = 0;
+	text	   *prefixText = NULL;
+	int			prefixSize = 0;
+
+	/*
+	 * If it's a collation-aware operator, but the collation is C, we can
+	 * treat it as non-collation-aware.
+	 */
+	if (strategy > 10 &&
+		lc_collate_is_c(PG_GET_COLLATION()))
+		strategy -= 10;
+
+	inText = DatumGetTextPP(in->query);
+	inSize = VARSIZE_ANY_EXHDR(inText);
+
+	/*
+	 * Reconstruct values represented at this tuple, including parent data,
+	 * prefix of this tuple if any, and the node label if any.  in->level
+	 * should be the length of the previously reconstructed value, and the
+	 * number of bytes added here is prefixSize or prefixSize + 1.
+	 *
+	 * Note: we assume that in->reconstructedValue isn't toasted and doesn't
+	 * have a short varlena header.  This is okay because it must have been
+	 * created by a previous invocation of this routine, and we always emit
+	 * long-format reconstructed values.
+	 */
+	Assert(in->level == 0 ? DatumGetPointer(in->reconstructedValue) == NULL :
+		   VARSIZE_ANY_EXHDR(DatumGetPointer(in->reconstructedValue)) == in->level);
+
+	maxReconstrLen = in->level + 1;
+	if (in->hasPrefix)
+	{
+		prefixText = DatumGetTextPP(in->prefixDatum);
+		prefixSize = VARSIZE_ANY_EXHDR(prefixText);
+		maxReconstrLen += prefixSize;
+	}
+
+	reconstrText = palloc(VARHDRSZ + maxReconstrLen);
+	SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen);
+
+	if (in->level)
+		memcpy(VARDATA(reconstrText),
+			   VARDATA(DatumGetPointer(in->reconstructedValue)),
+			   in->level);
+	if (prefixSize)
+		memcpy(((char *) VARDATA(reconstrText)) + in->level,
+			   VARDATA_ANY(prefixText),
+			   prefixSize);
+	/* last byte of reconstrText will be filled in below */
+
+	/*
+	 * Scan the child nodes.  For each one, complete the reconstructed value
+	 * and see if it's consistent with the query.  If so, emit an entry into
+	 * the output arrays.
+	 */
+	out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
+	out->levelAdds = (int *) palloc(sizeof(int) * in->nNodes);
+	out->reconstructedValues = (Datum *) palloc(sizeof(Datum) * in->nNodes);
+	out->nNodes = 0;
+
+	for (i = 0; i < in->nNodes; i++)
+	{
+		uint8		nodeChar = DatumGetUInt8(in->nodeLabels[i]);
+		int			thisLen;
+		int			r;
+		bool		res = false;
+
+		/* If nodeChar is zero, don't include it in data */
+		if (nodeChar == '\0')
+			thisLen = maxReconstrLen - 1;
+		else
+		{
+			((char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
+			thisLen = maxReconstrLen;
+		}
+
+		r = memcmp(VARDATA(reconstrText), VARDATA_ANY(inText),
+				   Min(inSize, thisLen));
+
+		switch (strategy)
+		{
+			case BTLessStrategyNumber:
+			case BTLessEqualStrategyNumber:
+				if (r <= 0)
+					res = true;
+				break;
+			case BTEqualStrategyNumber:
+				if (r == 0 && inSize >= thisLen)
+					res = true;
+				break;
+			case BTGreaterEqualStrategyNumber:
+			case BTGreaterStrategyNumber:
+				if (r >= 0)
+					res = true;
+				break;
+			case BTLessStrategyNumber + 10:
+			case BTLessEqualStrategyNumber + 10:
+			case BTGreaterEqualStrategyNumber + 10:
+			case BTGreaterStrategyNumber + 10:
+				/*
+				 * with non-C collation we need to traverse whole tree :-(
+				 */
+				res = true;
+				break;
+			default:
+				elog(ERROR, "unrecognized strategy number: %d",
+					 in->strategy);
+				break;
+		}
+
+		if (res)
+		{
+			out->nodeNumbers[out->nNodes] = i;
+			out->levelAdds[out->nNodes] = thisLen - in->level;
+			SET_VARSIZE(reconstrText, VARHDRSZ + thisLen);
+			out->reconstructedValues[out->nNodes] =
+				datumCopy(PointerGetDatum(reconstrText), false, -1);
+			out->nNodes++;
+		}
+	}
+
+	PG_RETURN_VOID();
+}
+
+Datum
+spg_text_leaf_consistent(PG_FUNCTION_ARGS)
+{
+	spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
+	spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
+	StrategyNumber strategy = in->strategy;
+	text	   *query = DatumGetTextPP(in->query);
+	int			level = in->level;
+	text	   *leafValue,
+			   *reconstrValue = NULL;
+	char	   *fullValue;
+	int			fullLen;
+	int			queryLen;
+	int			r;
+	bool		res;
+
+	/* all tests are exact */
+	out->recheck = false;
+
+	leafValue = DatumGetTextPP(in->leafDatum);
+
+	if (DatumGetPointer(in->reconstructedValue))
+		reconstrValue = DatumGetTextP(in->reconstructedValue);
+
+	Assert(level == 0 ? reconstrValue == NULL :
+		   VARSIZE_ANY_EXHDR(reconstrValue) == level);
+
+	fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
+
+	queryLen = VARSIZE_ANY_EXHDR(query);
+
+	/* For equality, we needn't reconstruct fullValue if not same length */
+	if (strategy == BTEqualStrategyNumber && queryLen != fullLen)
+		PG_RETURN_BOOL(false);
+
+	/* Else, reconstruct the full string represented by this leaf tuple */
+	if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
+	{
+		fullValue = VARDATA(reconstrValue);
+	}
+	else
+	{
+		fullValue = palloc(fullLen);
+		if (level)
+			memcpy(fullValue, VARDATA(reconstrValue), level);
+		if (VARSIZE_ANY_EXHDR(leafValue) > 0)
+			memcpy(fullValue + level, VARDATA_ANY(leafValue),
+				   VARSIZE_ANY_EXHDR(leafValue));
+	}
+
+	/* Run the appropriate type of comparison */
+	if (strategy > 10)
+	{
+		/* Collation-aware comparison */
+		strategy -= 10;
+
+		/* If asserts are enabled, verify encoding of reconstructed string */
+		Assert(pg_verifymbstr(fullValue, fullLen, false));
+
+		r = varstr_cmp(fullValue, Min(queryLen, fullLen),
+					   VARDATA_ANY(query), Min(queryLen, fullLen),
+					   PG_GET_COLLATION());
+	}
+	else
+	{
+		/* Non-collation-aware comparison */
+		r = memcmp(fullValue, VARDATA_ANY(query), Min(queryLen, fullLen));
+	}
+
+	if (r == 0)
+	{
+		if (queryLen > fullLen)
+			r = -1;
+		else if (queryLen < fullLen)
+			r = 1;
+	}
+
+	switch (strategy)
+	{
+		case BTLessStrategyNumber:
+			res = (r < 0);
+			break;
+		case BTLessEqualStrategyNumber:
+			res = (r <= 0);
+			break;
+		case BTEqualStrategyNumber:
+			res = (r == 0);
+			break;
+		case BTGreaterEqualStrategyNumber:
+			res = (r >= 0);
+			break;
+		case BTGreaterStrategyNumber:
+			res = (r > 0);
+			break;
+		default:
+			elog(ERROR, "unrecognized strategy number: %d", in->strategy);
+			res = false;
+			break;
+	}
+
+	PG_RETURN_BOOL(res);
+}
author	Tom Lane <tgl@sss.pgh.pa.us>	2011-12-17 16:41:16 -0500
committer	Tom Lane <tgl@sss.pgh.pa.us>	2011-12-17 16:42:30 -0500
commit	8daeb5ddd698f661eb118f8e874e7c68cfd6ae09 (patch)
tree	765599b73e45a6ca5529398489f31a534ab1924e /src/backend/access/spgist/spgtextproc.c
parent	19fc0fe3ae7861a8b0d3ab8b67bd01fde33bf2da (diff)
download	postgresql-8daeb5ddd698f661eb118f8e874e7c68cfd6ae09.tar.gz postgresql-8daeb5ddd698f661eb118f8e874e7c68cfd6ae09.zip