35 files changed, 9503 insertions, 49 deletions
diff --git a/src/backend/access/Makefile b/src/backend/access/Makefile
index a4c4ca7da94..0366d59624e 100644
--- a/src/backend/access/Makefile
+++ b/src/backend/access/Makefile
@@ -8,6 +8,6 @@ subdir = src/backend/access
 top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
-SUBDIRS	    = common gist hash heap index nbtree transam gin
+SUBDIRS	    = common gist hash heap index nbtree transam gin spgist
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 240e178b3b4..100172fa4ac 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -19,6 +19,7 @@
 #include "access/hash.h"
 #include "access/nbtree.h"
 #include "access/reloptions.h"
+#include "access/spgist.h"
 #include "catalog/pg_type.h"
 #include "commands/defrem.h"
 #include "commands/tablespace.h"
@@ -106,6 +107,14 @@ static relopt_int intRelOpts[] =
 	},
 	{
 		{
+			"fillfactor",
+			"Packs spgist index pages only to this percentage",
+			RELOPT_KIND_SPGIST
+		},
+		SPGIST_DEFAULT_FILLFACTOR, SPGIST_MIN_FILLFACTOR, 100
+	},
+	{
+		{
 			"autovacuum_vacuum_threshold",
 			"Minimum number of tuple updates or deletes prior to vacuum",
 			RELOPT_KIND_HEAP | RELOPT_KIND_TOAST
diff --git a/src/backend/access/spgist/Makefile b/src/backend/access/spgist/Makefile
new file mode 100644
index 00000000000..918da1fccaf
--- /dev/null
+++ b/src/backend/access/spgist/Makefile
@@ -0,0 +1,19 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+#    Makefile for access/spgist
+#
+# IDENTIFICATION
+#    src/backend/access/spgist/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/access/spgist
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = spgutils.o spginsert.o spgscan.o spgvacuum.o \
+	spgdoinsert.o spgxlog.o \
+	spgtextproc.o spgquadtreeproc.o spgkdtreeproc.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/spgist/README b/src/backend/access/spgist/README
new file mode 100644
index 00000000000..4ff0e357cb4
--- /dev/null
+++ b/src/backend/access/spgist/README
@@ -0,0 +1,316 @@
+src/backend/access/spgist/README
+
+SP-GiST is an abbreviation of space-partitioned GiST.  It provides a
+generalized infrastructure for implementing space-partitioned data
+structures, such as quadtrees, k-d trees, and suffix trees (tries).  When
+implemented in main memory, these structures are usually designed as a set of
+dynamically-allocated nodes linked by pointers.  This is not suitable for
+direct storing on disk, since the chains of pointers can be rather long and
+require too many disk accesses. In contrast, disk based data structures
+should have a high fanout to minimize I/O.  The challenge is to map tree
+nodes to disk pages in such a way that the search algorithm accesses only a
+few disk pages, even if it traverses many nodes.
+
+COMMON STRUCTURE DESCRIPTION
+
+Logically, an SP-GiST tree is a set of tuples, each of which can be either
+an inner or leaf tuple.  Each inner tuple contains "nodes", which are
+(label,pointer) pairs, where the pointer (ItemPointerData) is a pointer to
+another inner tuple or to the head of a list of leaf tuples.  Inner tuples
+can have different numbers of nodes (children).  Branches can be of different
+depth (actually, there is no control or code to support balancing), which
+means that the tree is non-balanced.  However, leaf and inner tuples cannot
+be intermixed at the same level: a downlink from a node of an inner tuple
+leads either to one inner tuple, or to a list of leaf tuples.
+
+The SP-GiST core requires that inner and leaf tuples fit on a single index
+page, and even more stringently that the list of leaf tuples reached from a
+single inner-tuple node all be stored on the same index page.  (Restricting
+such lists to not cross pages reduces seeks, and allows the list links to be
+stored as simple 2-byte OffsetNumbers.)  SP-GiST index opclasses should
+therefore ensure that not too many nodes can be needed in one inner tuple,
+and that inner-tuple prefixes and leaf-node datum values not be too large.
+
+Inner and leaf tuples are stored separately: the former are stored only on
+"inner" pages, the latter only on "leaf" pages.  Also, there are special
+restrictions on the root page.  Early in an index's life, when there is only
+one page's worth of data, the root page contains an unorganized set of leaf
+tuples.  After the first page split has occurred, the root is required to
+contain exactly one inner tuple.
+
+When the search traversal algorithm reaches an inner tuple, it chooses a set
+of nodes to continue tree traverse in depth.  If it reaches a leaf page it
+scans a list of leaf tuples to find the ones that match the query.
+
+The insertion algorithm descends the tree similarly, except it must choose
+just one node to descend to from each inner tuple.  Insertion might also have
+to modify the inner tuple before it can descend: it could add a new node, or
+it could "split" the tuple to obtain a less-specific prefix that can match
+the value to be inserted.  If it's necessary to append a new leaf tuple to a
+list and there is no free space on page, then SP-GiST creates a new inner
+tuple and distributes leaf tuples into a set of lists on, perhaps, several
+pages.
+
+Inner tuple consists of:
+
+  optional prefix value - all successors must be consistent with it.
+    Example:
+        suffix tree  - prefix value is a common prefix string
+        quad tree    - centroid
+        k-d tree     - one coordinate
+
+  list of nodes, where node is a (label, pointer) pair.
+    Example of a label: a single character for suffix tree
+
+Leaf tuple consists of:
+
+  a leaf value
+    Example:
+        suffix tree - the rest of string (postfix)
+        quad and k-d tree - the point itself
+
+  ItemPointer to the heap
+
+INSERTION ALGORITHM
+
+Insertion algorithm is designed to keep the tree in a consistent state at
+any moment.  Here is a simplified insertion algorithm specification
+(numbers refer to notes below):
+
+  Start with the first tuple on the root page (1)
+
+  loop:
+    if (page is leaf) then
+        if (enough space)
+            insert on page and exit (5)
+        else (7)
+            call PickSplitFn() (2)
+        end if
+    else
+        switch (chooseFn())
+            case MatchNode  - descend through selected node
+            case AddNode    - add node and then retry chooseFn (3, 6)
+            case SplitTuple - split inner tuple to prefix and postfix, then
+                              retry chooseFn with the prefix tuple (4, 6)
+    end if
+
+Notes:
+
+(1) Initially, we just dump leaf tuples into the root page until it is full;
+then we split it.  Once the root is not a leaf page, it can have only one
+inner tuple, so as to keep the amount of free space on the root as large as
+possible.  Both of these rules are meant to postpone doing PickSplit on the
+root for as long as possible, so that the topmost partitioning of the search
+space is as good as we can easily make it.
+
+(2) Current implementation allows to do picksplit and insert a new leaf tuple
+in one operation, if the new list of leaf tuples fits on one page. It's
+always possible for trees with small nodes like quad tree or k-d tree, but
+suffix trees may require another picksplit.
+
+(3) Addition of node must keep size of inner tuple small enough to fit on a
+page.  After addition, inner tuple could become too large to be stored on
+current page because of other tuples on page. In this case it will be moved
+to another inner page (see notes about page management). When moving tuple to
+another page, we can't change the numbers of other tuples on the page, else
+we'd make downlink pointers to them invalid. To prevent that, SP-GiST leaves
+a "placeholder" tuple, which can be reused later whenever another tuple is
+added to the page. See also Concurrency and Vacuum sections below. Right now
+only suffix trees could add a node to the tuple; quad trees and k-d trees
+make all possible nodes at once in PickSplitFn() call.
+
+(4) Prefix value could only partially match a new value, so the SplitTuple
+action allows breaking the current tree branch into upper and lower sections.
+Another way to say it is that we can split the current inner tuple into
+"prefix" and "postfix" parts, where the prefix part is able to match the
+incoming new value. Consider example of insertion into a suffix tree. We use
+the following notation, where tuple's id is just for discussion (no such id
+is actually stored):
+
+inner tuple: {tuple id}(prefix string)[ comma separated list of node labels ]
+leaf tuple: {tuple id}<value>
+
+Suppose we need to insert string 'www.gogo.com' into inner tuple
+
+    {1}(www.google.com/)[a, i]
+
+The string does not match the prefix so we cannot descend.  We must
+split the inner tuple into two tuples:
+
+    {2}(www.go)[o]  - prefix tuple
+                |
+                {3}(gle.com/)[a,i] - postfix tuple
+
+On the next iteration of loop we find that 'www.gogo.com' matches the
+prefix, but not any node label, so we add a node [g] to tuple {2}:
+
+                   NIL (no child exists yet)
+                   |
+    {2}(www.go)[o, g]
+                |
+                {3}(gle.com/)[a,i]
+
+Now we can descend through the [g] node, which will cause us to update
+the target string to just 'o.com'.  Finally, we'll insert a leaf tuple
+bearing that string:
+
+                  {4}<o.com>
+                   |
+    {2}(www.go)[o, g]
+                |
+                {3}(gle.com/)[a,i]
+
+As we can see, the original tuple's node array moves to postfix tuple without
+any changes.  Note also that SP-GiST core assumes that prefix tuple is not
+larger than old inner tuple.  That allows us to store prefix tuple directly
+in place of old inner tuple.  SP-GiST core will try to store postfix tuple on
+the same page if possible, but will use another page if there is not enough
+free space (see notes 5 and 6).  Currently, quad and k-d trees don't use this
+feature, because they have no concept of a prefix being "inconsistent" with
+any new value.  They grow their depth only by PickSplitFn() call.
+
+(5) If pointer from node of parent is a NIL pointer, algorithm chooses a leaf
+page to store on.  At first, it tries to use the last-used leaf page with the
+largest free space (which we track in each backend) to better utilize disk
+space.  If that's not large enough, then the algorithm allocates a new page.
+
+(6) Management of inner pages is very similar to management of leaf pages,
+described in (5).
+
+(7) Actually, current implementation can move the whole list of leaf tuples
+and a new tuple to another page, if the list is short enough. This improves
+space utilization, but doesn't change the basis of the algorithm.
+
+CONCURRENCY
+
+While descending the tree, the insertion algorithm holds exclusive lock on
+two tree levels at a time, ie both parent and child pages (parent and child
+pages can be the same, see notes above). There is a possibility of deadlock
+between two insertions if there are cross-referenced pages in different
+branches.  That is, if inner tuple on page M has a child on page N while
+an inner tuple from another branch is on page N and has a child on page M,
+then two insertions descending the two branches could deadlock.  To prevent
+deadlocks we introduce a concept of "triple parity" of pages: if inner tuple
+is on page with BlockNumber N, then its child tuples should be placed on the
+same page, or else on a page with BlockNumber M where (N+1) mod 3 == M mod 3.
+This rule guarantees that tuples on page M will have no children on page N,
+since (M+1) mod 3 != N mod 3.
+
+Insertion may also need to take locks on an additional inner and/or leaf page
+to add tuples of the right type(s), when there's not enough room on the pages
+it descended through.  However, we don't care exactly which such page we add
+to, so deadlocks can be avoided by conditionally locking the additional
+buffers: if we fail to get lock on an additional page, just try another one.
+
+Search traversal algorithm is rather traditional.  At each non-leaf level, it
+share-locks the page, identifies which node(s) in the current inner tuple
+need to be visited, and puts those addresses on a stack of pages to examine
+later.  It then releases lock on the current buffer before visiting the next
+stack item.  So only one page is locked at a time, and no deadlock is
+possible.  But instead, we have to worry about race conditions: by the time
+we arrive at a pointed-to page, a concurrent insertion could have replaced
+the target inner tuple (or leaf tuple chain) with data placed elsewhere.
+To handle that, whenever the insertion algorithm changes a nonempty downlink
+in an inner tuple, it places a "redirect tuple" in place of the lower-level
+inner tuple or leaf-tuple chain head that the link formerly led to.  Scans
+(though not insertions) must be prepared to honor such redirects.  Only a
+scan that had already visited the parent level could possibly reach such a
+redirect tuple, so we can remove redirects once all active transactions have
+been flushed out of the system.
+
+DEAD TUPLES
+
+Tuples on leaf pages can be in one of four states:
+
+SPGIST_LIVE: normal, live pointer to a heap tuple.
+
+SPGIST_REDIRECT: placeholder that contains a link to another place in the
+index.  When a chain of leaf tuples has to be moved to another page, a
+redirect tuple is inserted in place of the chain's head tuple.  The parent
+inner tuple's downlink is updated when this happens, but concurrent scans
+might be "in flight" from the parent page to the child page (since they
+release lock on the parent page before attempting to lock the child).
+The redirect pointer serves to tell such a scan where to go.  A redirect
+pointer is only needed for as long as such concurrent scans could be in
+progress.  Eventually, it's converted into a PLACEHOLDER dead tuple by
+VACUUM, and is then a candidate for replacement.  Searches that find such
+a tuple (which should never be part of a chain) should immediately proceed
+to the other place, forgetting about the redirect tuple.  Insertions that
+reach such a tuple should raise error, since a valid downlink should never
+point to such a tuple.
+
+SPGIST_DEAD: tuple is dead, but it cannot be removed or moved to a
+different offset on the page because there is a link leading to it from
+some inner tuple elsewhere in the index.  (Such a tuple is never part of a
+chain, since we don't need one unless there is nothing live left in its
+chain.)  Searches should ignore such entries.  If an insertion action
+arrives at such a tuple, it should either replace it in-place (if there's
+room on the page to hold the desired new leaf tuple) or replace it with a
+redirection pointer to wherever it puts the new leaf tuple.
+
+SPGIST_PLACEHOLDER: tuple is dead, and there are known to be no links to
+it from elsewhere.  When a live tuple is deleted or moved away, and not
+replaced by a redirect pointer, it is replaced by a placeholder to keep
+the offsets of later tuples on the same page from changing.  Placeholders
+can be freely replaced when adding a new tuple to the page, and also
+VACUUM will delete any that are at the end of the range of valid tuple
+offsets.  Both searches and insertions should complain if a link from
+elsewhere leads them to a placeholder tuple.
+
+When the root page is also a leaf, all its tuple should be in LIVE state;
+there's no need for the others since there are no links and no need to
+preserve offset numbers.
+
+Tuples on inner pages can be in LIVE, REDIRECT, or PLACEHOLDER states.
+The REDIRECT state has the same function as on leaf pages, to send
+concurrent searches to the place where they need to go after an inner
+tuple is moved to another page.  Expired REDIRECT pointers are converted
+to PLACEHOLDER status by VACUUM, and are then candidates for replacement.
+DEAD state is not currently possible, since VACUUM does not attempt to
+remove unused inner tuples.
+
+VACUUM
+
+VACUUM (or more precisely, spgbulkdelete) performs a single sequential scan
+over the entire index.  On both leaf and inner pages, we can convert old
+REDIRECT tuples into PLACEHOLDER status, and then remove any PLACEHOLDERs
+that are at the end of the page (since they aren't needed to preserve the
+offsets of any live tuples).  On leaf pages, we scan for tuples that need
+to be deleted because their heap TIDs match a vacuum target TID.
+
+If we find a deletable tuple that is not at the head of its chain, we
+can simply replace it with a PLACEHOLDER, updating the chain links to
+remove it from the chain.  If it is at the head of its chain, but there's
+at least one live tuple remaining in the chain, we move that live tuple
+to the head tuple's offset, replacing it with a PLACEHOLDER to preserve
+the offsets of other tuples.  This keeps the parent inner tuple's downlink
+valid.  If we find ourselves deleting all live tuples in a chain, we
+replace the head tuple with a DEAD tuple and the rest with PLACEHOLDERS.
+The parent inner tuple's downlink thus points to the DEAD tuple, and the
+rules explained in the previous section keep everything working.
+
+VACUUM doesn't know a-priori which tuples are heads of their chains, but
+it can easily figure that out by constructing a predecessor array that's
+the reverse map of the nextOffset links (ie, when we see tuple x links to
+tuple y, we set predecessor[y] = x).  Then head tuples are the ones with
+no predecessor.
+
+spgbulkdelete also updates the index's free space map.
+
+Currently, spgvacuumcleanup has nothing to do if spgbulkdelete was
+performed; otherwise, it does an spgbulkdelete scan with an empty target
+list, so as to clean up redirections and placeholders, update the free
+space map, and gather statistics.
+
+LAST USED PAGE MANAGEMENT
+
+List of last used pages contains four pages - a leaf page and three inner
+pages, one from each "triple parity" group.  This list is stored between
+calls on the index meta page, but updates are never WAL-logged to decrease
+WAL traffic.  Incorrect data on meta page isn't critical, because we could
+allocate a new page at any moment.
+
+AUTHORS
+
+    Teodor Sigaev <teodor@sigaev.ru>
+    Oleg Bartunov <oleg@sai.msu.su>
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
new file mode 100644
index 00000000000..4bb8dfa1509
--- /dev/null
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -0,0 +1,2065 @@
+/*-------------------------------------------------------------------------
+ *
+ * spgdoinsert.c
+ *	  implementation of insert algorithm
+ *
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/spgist/spgdoinsert.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/spgist_private.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+
+
+/*
+ * SPPageDesc tracks all info about a page we are inserting into.  In some
+ * situations it actually identifies a tuple, or even a specific node within
+ * an inner tuple.  But any of the fields can be invalid.  If the buffer
+ * field is valid, it implies we hold pin and exclusive lock on that buffer.
+ * page pointer should be valid exactly when buffer is.
+ */
+typedef struct SPPageDesc
+{
+	BlockNumber blkno;			/* block number, or InvalidBlockNumber */
+	Buffer		buffer;			/* page's buffer number, or InvalidBuffer */
+	Page		page;			/* pointer to page buffer, or NULL */
+	OffsetNumber offnum;		/* offset of tuple, or InvalidOffsetNumber */
+	int			node;			/* node number within inner tuple, or -1 */
+} SPPageDesc;
+
+
+/*
+ * Set the item pointer in the nodeN'th entry in inner tuple tup.  This
+ * is used to update the parent inner tuple's downlink after a move or
+ * split operation.
+ */
+void
+updateNodeLink(SpGistInnerTuple tup, int nodeN,
+			   BlockNumber blkno, OffsetNumber offset)
+{
+	int			i;
+	SpGistNodeTuple node;
+
+	SGITITERATE(tup, i, node)
+	{
+		if (i == nodeN)
+		{
+			ItemPointerSet(&node->t_tid, blkno, offset);
+			return;
+		}
+	}
+
+	elog(ERROR, "failed to find requested node %d in SPGiST inner tuple",
+		 nodeN);
+}
+
+/*
+ * Form a new inner tuple containing one more node than the given one, with
+ * the specified label datum, inserted at offset "offset" in the node array.
+ * The new tuple's prefix is the same as the old one's.
+ *
+ * Note that the new node initially has an invalid downlink.  We'll find a
+ * page to point it to later.
+ */
+static SpGistInnerTuple
+addNode(SpGistState *state, SpGistInnerTuple tuple, Datum label, int offset)
+{
+	SpGistNodeTuple node,
+			   *nodes;
+	int			i;
+
+	/* if offset is negative, insert at end */
+	if (offset < 0)
+		offset = tuple->nNodes;
+	else if (offset > tuple->nNodes)
+		elog(ERROR, "invalid offset for adding node to SPGiST inner tuple");
+
+	nodes = palloc(sizeof(SpGistNodeTuple) * (tuple->nNodes + 1));
+	SGITITERATE(tuple, i, node)
+	{
+		if (i < offset)
+			nodes[i] = node;
+		else
+			nodes[i + 1] = node;
+	}
+
+	nodes[offset] = spgFormNodeTuple(state, label, false);
+
+	return spgFormInnerTuple(state,
+							 (tuple->prefixSize > 0),
+							 SGITDATUM(tuple, state),
+							 tuple->nNodes + 1,
+							 nodes);
+}
+
+/* qsort comparator for sorting OffsetNumbers */
+static int
+cmpOffsetNumbers(const void *a, const void *b)
+{
+	if (*(const OffsetNumber *) a == *(const OffsetNumber *) b)
+		return 0;
+	return (*(const OffsetNumber *) a > *(const OffsetNumber *) b) ? 1 : -1;
+}
+
+/*
+ * Delete multiple tuples from an index page, preserving tuple offset numbers.
+ *
+ * The first tuple in the given list is replaced with a dead tuple of type
+ * "firststate" (REDIRECT/DEAD/PLACEHOLDER); the remaining tuples are replaced
+ * with dead tuples of type "reststate".  If either firststate or reststate
+ * is REDIRECT, blkno/offnum specify where to link to.
+ *
+ * NB: this is used during WAL replay, so beware of trying to make it too
+ * smart.  In particular, it shouldn't use "state" except for calling
+ * spgFormDeadTuple().
+ */
+void
+spgPageIndexMultiDelete(SpGistState *state, Page page,
+						OffsetNumber *itemnos, int nitems,
+						int firststate, int reststate,
+						BlockNumber blkno, OffsetNumber offnum)
+{
+	OffsetNumber	firstItem;
+	OffsetNumber   *sortednos;
+	SpGistDeadTuple tuple = NULL;
+	int			i;
+
+	if (nitems == 0)
+		return;					/* nothing to do */
+
+	/*
+	 * For efficiency we want to use PageIndexMultiDelete, which requires the
+	 * targets to be listed in sorted order, so we have to sort the itemnos
+	 * array.  (This also greatly simplifies the math for reinserting the
+	 * replacement tuples.)  However, we must not scribble on the caller's
+	 * array, so we have to make a copy.
+	 */
+	sortednos = (OffsetNumber *) palloc(sizeof(OffsetNumber) * nitems);
+	memcpy(sortednos, itemnos, sizeof(OffsetNumber) * nitems);
+	if (nitems > 1)
+		qsort(sortednos, nitems, sizeof(OffsetNumber), cmpOffsetNumbers);
+
+	PageIndexMultiDelete(page, sortednos, nitems);
+
+	firstItem = itemnos[0];
+
+	for (i = 0; i < nitems; i++)
+	{
+		OffsetNumber	itemno = sortednos[i];
+		int				tupstate;
+
+		tupstate = (itemno == firstItem) ? firststate : reststate;
+		if (tuple == NULL || tuple->tupstate != tupstate)
+			tuple = spgFormDeadTuple(state, tupstate, blkno, offnum);
+
+		if (PageAddItem(page, (Item) tuple, tuple->size,
+						itemno, false, false) != itemno)
+			elog(ERROR, "failed to add item of size %u to SPGiST index page",
+				 tuple->size);
+
+		if (tupstate == SPGIST_REDIRECT)
+			SpGistPageGetOpaque(page)->nRedirection++;
+		else if (tupstate == SPGIST_PLACEHOLDER)
+			SpGistPageGetOpaque(page)->nPlaceholder++;
+	}
+
+	pfree(sortednos);
+}
+
+/*
+ * Update the parent inner tuple's downlink, and mark the parent buffer
+ * dirty (this must be the last change to the parent page in the current
+ * WAL action).
+ */
+static void
+saveNodeLink(Relation index, SPPageDesc *parent,
+			 BlockNumber blkno, OffsetNumber offnum)
+{
+	SpGistInnerTuple innerTuple;
+
+	innerTuple = (SpGistInnerTuple) PageGetItem(parent->page,
+								PageGetItemId(parent->page, parent->offnum));
+
+	updateNodeLink(innerTuple, parent->node, blkno, offnum);
+
+	MarkBufferDirty(parent->buffer);
+}
+
+/*
+ * Add a leaf tuple to a leaf page where there is known to be room for it
+ */
+static void
+addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
+			 SPPageDesc *current, SPPageDesc *parent, bool isNew)
+{
+	XLogRecData rdata[4];
+	spgxlogAddLeaf xlrec;
+
+	xlrec.node = index->rd_node;
+	xlrec.blknoLeaf = current->blkno;
+	xlrec.newPage = isNew;
+
+	/* these will be filled below as needed */
+	xlrec.offnumLeaf = InvalidOffsetNumber;
+	xlrec.offnumHeadLeaf = InvalidOffsetNumber;
+	xlrec.blknoParent = InvalidBlockNumber;
+	xlrec.offnumParent = InvalidOffsetNumber;
+	xlrec.nodeI = 0;
+
+	ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
+	/* we assume sizeof(xlrec) is at least int-aligned */
+	ACCEPT_RDATA_DATA(leafTuple, leafTuple->size, 1);
+	ACCEPT_RDATA_BUFFER(current->buffer, 2);
+
+	START_CRIT_SECTION();
+
+	if (current->offnum == InvalidOffsetNumber ||
+		current->blkno == SPGIST_HEAD_BLKNO)
+	{
+		/* Tuple is not part of a chain */
+		leafTuple->nextOffset = InvalidOffsetNumber;
+		current->offnum = SpGistPageAddNewItem(state, current->page,
+											   (Item) leafTuple, leafTuple->size,
+											   NULL, false);
+
+		xlrec.offnumLeaf = current->offnum;
+
+		/* Must update parent's downlink if any */
+		if (parent->buffer != InvalidBuffer)
+		{
+			xlrec.blknoParent = parent->blkno;
+			xlrec.offnumParent = parent->offnum;
+			xlrec.nodeI = parent->node;
+
+			saveNodeLink(index, parent, current->blkno, current->offnum);
+
+			ACCEPT_RDATA_BUFFER(parent->buffer, 3);
+		}
+	}
+	else
+	{
+		/*
+		 * Tuple must be inserted into existing chain.  We mustn't change
+		 * the chain's head address, but we don't need to chase the entire
+		 * chain to put the tuple at the end; we can insert it second.
+		 *
+		 * Also, it's possible that the "chain" consists only of a DEAD tuple,
+		 * in which case we should replace the DEAD tuple in-place.
+		 */
+		SpGistLeafTuple head;
+		OffsetNumber offnum;
+
+		head = (SpGistLeafTuple) PageGetItem(current->page,
+											 PageGetItemId(current->page, current->offnum));
+		if (head->tupstate == SPGIST_LIVE)
+		{
+			leafTuple->nextOffset = head->nextOffset;
+			offnum = SpGistPageAddNewItem(state, current->page,
+										  (Item) leafTuple, leafTuple->size,
+										  NULL, false);
+
+			/*
+			 * re-get head of list because it could have been moved on page,
+			 * and set new second element
+			 */
+			head = (SpGistLeafTuple) PageGetItem(current->page,
+											 PageGetItemId(current->page, current->offnum));
+			head->nextOffset = offnum;
+
+			xlrec.offnumLeaf = offnum;
+			xlrec.offnumHeadLeaf = current->offnum;
+		}
+		else if (head->tupstate == SPGIST_DEAD)
+		{
+			leafTuple->nextOffset = InvalidOffsetNumber;
+			PageIndexTupleDelete(current->page, current->offnum);
+			if (PageAddItem(current->page,
+							(Item) leafTuple, leafTuple->size,
+							current->offnum, false, false) != current->offnum)
+				elog(ERROR, "failed to add item of size %u to SPGiST index page",
+					 leafTuple->size);
+
+			/* WAL replay distinguishes this case by equal offnums */
+			xlrec.offnumLeaf = current->offnum;
+			xlrec.offnumHeadLeaf = current->offnum;
+		}
+		else
+			elog(ERROR, "unexpected SPGiST tuple state: %d", head->tupstate);
+	}
+
+	MarkBufferDirty(current->buffer);
+
+	if (RelationNeedsWAL(index))
+	{
+		XLogRecPtr	recptr;
+
+		recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF, rdata);
+
+		PageSetLSN(current->page, recptr);
+		PageSetTLI(current->page, ThisTimeLineID);
+
+		/* update parent only if we actually changed it */
+		if (xlrec.blknoParent != InvalidBlockNumber)
+		{
+			PageSetLSN(parent->page, recptr);
+			PageSetTLI(parent->page, ThisTimeLineID);
+		}
+	}
+
+	END_CRIT_SECTION();
+}
+
+/*
+ * Count the number and total size of leaf tuples in the chain starting at
+ * current->offnum.  Return number into *nToSplit and total size as function
+ * result.
+ *
+ * Klugy special case when considering the root page (i.e., root is a leaf
+ * page, but we're about to split for the first time): return fake large
+ * values to force spgdoinsert() to take the doPickSplit rather than
+ * moveLeafs code path.  moveLeafs is not prepared to deal with root page.
+ */
+static int
+checkSplitConditions(Relation index, SpGistState *state,
+					 SPPageDesc *current, int *nToSplit)
+{
+	int			i,
+				n = 0,
+				totalSize = 0;
+
+	if (current->blkno == SPGIST_HEAD_BLKNO)
+	{
+		/* return impossible values to force split */
+		*nToSplit = BLCKSZ;
+		return BLCKSZ;
+	}
+
+	i = current->offnum;
+	while (i != InvalidOffsetNumber)
+	{
+		SpGistLeafTuple it;
+
+		Assert(i >= FirstOffsetNumber &&
+			   i <= PageGetMaxOffsetNumber(current->page));
+		it = (SpGistLeafTuple) PageGetItem(current->page,
+										   PageGetItemId(current->page, i));
+		if (it->tupstate == SPGIST_LIVE)
+		{
+			n++;
+			totalSize += it->size + sizeof(ItemIdData);
+		}
+		else if (it->tupstate == SPGIST_DEAD)
+		{
+			/* We could see a DEAD tuple as first/only chain item */
+			Assert(i == current->offnum);
+			Assert(it->nextOffset == InvalidOffsetNumber);
+			/* Don't count it in result, because it won't go to other page */
+		}
+		else
+			elog(ERROR, "unexpected SPGiST tuple state: %d", it->tupstate);
+
+		i = it->nextOffset;
+	}
+
+	*nToSplit = n;
+
+	return totalSize;
+}
+
+/*
+ * current points to a leaf-tuple chain that we wanted to add newLeafTuple to,
+ * but the chain has to be moved because there's not enough room to add
+ * newLeafTuple to its page.  We use this method when the chain contains
+ * very little data so a split would be inefficient.  We are sure we can
+ * fit the chain plus newLeafTuple on one other page.
+ */
+static void
+moveLeafs(Relation index, SpGistState *state,
+		  SPPageDesc *current, SPPageDesc *parent,
+		  SpGistLeafTuple newLeafTuple)
+{
+	int			i,
+				nDelete,
+				nInsert,
+				size;
+	Buffer		nbuf;
+	Page		npage;
+	SpGistLeafTuple it;
+	OffsetNumber r = InvalidOffsetNumber,
+				startOffset = InvalidOffsetNumber;
+	bool		replaceDead = false;
+	OffsetNumber *toDelete;
+	OffsetNumber *toInsert;
+	BlockNumber nblkno;
+	XLogRecData rdata[7];
+	spgxlogMoveLeafs xlrec;
+	char	   *leafdata,
+			   *leafptr;
+
+	/* This doesn't work on root page */
+	Assert(parent->buffer != InvalidBuffer);
+	Assert(parent->buffer != current->buffer);
+
+	/* Locate the tuples to be moved, and count up the space needed */
+	i = PageGetMaxOffsetNumber(current->page);
+	toDelete = (OffsetNumber *) palloc(sizeof(OffsetNumber) * i);
+	toInsert = (OffsetNumber *) palloc(sizeof(OffsetNumber) * (i + 1));
+
+	size = newLeafTuple->size + sizeof(ItemIdData);
+
+	nDelete = 0;
+	i = current->offnum;
+	while (i != InvalidOffsetNumber)
+	{
+		SpGistLeafTuple it;
+
+		Assert(i >= FirstOffsetNumber &&
+			   i <= PageGetMaxOffsetNumber(current->page));
+		it = (SpGistLeafTuple) PageGetItem(current->page,
+										   PageGetItemId(current->page, i));
+
+		if (it->tupstate == SPGIST_LIVE)
+		{
+			toDelete[nDelete] = i;
+			size += it->size + sizeof(ItemIdData);
+			nDelete++;
+		}
+		else if (it->tupstate == SPGIST_DEAD)
+		{
+			/* We could see a DEAD tuple as first/only chain item */
+			Assert(i == current->offnum);
+			Assert(it->nextOffset == InvalidOffsetNumber);
+			/* We don't want to move it, so don't count it in size */
+			toDelete[nDelete] = i;
+			nDelete++;
+			replaceDead = true;
+		}
+		else
+			elog(ERROR, "unexpected SPGiST tuple state: %d", it->tupstate);
+
+		i = it->nextOffset;
+	}
+
+	/* Find a leaf page that will hold them */
+	nbuf = SpGistGetBuffer(index, GBUF_LEAF, size, &xlrec.newPage);
+	npage = BufferGetPage(nbuf);
+	nblkno = BufferGetBlockNumber(nbuf);
+	Assert(nblkno != current->blkno);
+
+	/* prepare WAL info */
+	xlrec.node = index->rd_node;
+	STORE_STATE(state, xlrec.stateSrc);
+
+	xlrec.blknoSrc = current->blkno;
+	xlrec.blknoDst = nblkno;
+	xlrec.nMoves = nDelete;
+	xlrec.replaceDead = replaceDead;
+
+	xlrec.blknoParent = parent->blkno;
+	xlrec.offnumParent = parent->offnum;
+	xlrec.nodeI = parent->node;
+
+	leafdata = leafptr = palloc(size);
+
+	START_CRIT_SECTION();
+
+	/* copy all the old tuples to new page, unless they're dead */
+	nInsert = 0;
+	if (!replaceDead)
+	{
+		for (i = 0; i < nDelete; i++)
+		{
+			it = (SpGistLeafTuple) PageGetItem(current->page,
+											   PageGetItemId(current->page, toDelete[i]));
+			Assert(it->tupstate == SPGIST_LIVE);
+
+			/*
+			 * Update chain link (notice the chain order gets reversed, but we
+			 * don't care).  We're modifying the tuple on the source page
+			 * here, but it's okay since we're about to delete it.
+			 */
+			it->nextOffset = r;
+
+			r = SpGistPageAddNewItem(state, npage, (Item) it, it->size,
+									 &startOffset, false);
+
+			toInsert[nInsert] = r;
+			nInsert++;
+
+			/* save modified tuple into leafdata as well */
+			memcpy(leafptr, it, it->size);
+			leafptr += it->size;
+		}
+	}
+
+	/* add the new tuple as well */
+	newLeafTuple->nextOffset = r;
+	r = SpGistPageAddNewItem(state, npage,
+							 (Item) newLeafTuple, newLeafTuple->size,
+							 &startOffset, false);
+	toInsert[nInsert] = r;
+	nInsert++;
+	memcpy(leafptr, newLeafTuple, newLeafTuple->size);
+	leafptr += newLeafTuple->size;
+
+	/*
+	 * Now delete the old tuples, leaving a redirection pointer behind for
+	 * the first one, unless we're doing an index build; in which case there
+	 * can't be any concurrent scan so we need not provide a redirect.
+	 */
+	spgPageIndexMultiDelete(state, current->page, toDelete, nDelete,
+							state->isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT,
+							SPGIST_PLACEHOLDER,
+							nblkno, r);
+
+	/* Update parent's downlink and mark parent page dirty */
+	saveNodeLink(index, parent, nblkno, r);
+
+	/* Mark the leaf pages too */
+	MarkBufferDirty(current->buffer);
+	MarkBufferDirty(nbuf);
+
+	if (RelationNeedsWAL(index))
+	{
+		XLogRecPtr	recptr;
+
+		ACCEPT_RDATA_DATA(&xlrec, MAXALIGN(sizeof(xlrec)), 0);
+		ACCEPT_RDATA_DATA(toDelete, MAXALIGN(sizeof(OffsetNumber) * nDelete), 1);
+		ACCEPT_RDATA_DATA(toInsert, MAXALIGN(sizeof(OffsetNumber) * nInsert), 2);
+		ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, 3);
+		ACCEPT_RDATA_BUFFER(current->buffer, 4);
+		ACCEPT_RDATA_BUFFER(nbuf, 5);
+		ACCEPT_RDATA_BUFFER(parent->buffer, 6);
+
+		recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS, rdata);
+
+		PageSetLSN(current->page, recptr);
+		PageSetTLI(current->page, ThisTimeLineID);
+		PageSetLSN(npage, recptr);
+		PageSetTLI(npage, ThisTimeLineID);
+		PageSetLSN(parent->page, recptr);
+		PageSetTLI(parent->page, ThisTimeLineID);
+	}
+
+	END_CRIT_SECTION();
+
+	/* Update local free-space cache and release new buffer */
+	SpGistSetLastUsedPage(index, nbuf);
+	UnlockReleaseBuffer(nbuf);
+}
+
+/*
+ * Update previously-created redirection tuple with appropriate destination
+ *
+ * We use this when it's not convenient to know the destination first.
+ * The tuple should have been made with the "impossible" destination of
+ * the metapage.
+ */
+static void
+setRedirectionTuple(SPPageDesc *current, OffsetNumber position,
+					BlockNumber blkno, OffsetNumber offnum)
+{
+	SpGistDeadTuple dt;
+
+	dt = (SpGistDeadTuple) PageGetItem(current->page,
+									   PageGetItemId(current->page, position));
+	Assert(dt->tupstate == SPGIST_REDIRECT);
+	Assert(ItemPointerGetBlockNumber(&dt->pointer) == SPGIST_METAPAGE_BLKNO);
+	ItemPointerSet(&dt->pointer, blkno, offnum);
+}
+
+/*
+ * Test to see if the user-defined picksplit function failed to do its job,
+ * ie, it put all the leaf tuples into the same node.
+ * If so, randomly divide the tuples into several nodes (all with the same
+ * label) and return TRUE to select allTheSame mode for this inner tuple.
+ *
+ * If we know that the leaf tuples wouldn't all fit on one page, then we
+ * exclude the last tuple (which is the incoming new tuple that forced a split)
+ * from the check to see if more than one node is used.  The reason for this
+ * is that if the existing tuples are put into only one chain, then even if
+ * we move them all to an empty page, there would still not be room for the
+ * new tuple, so we'd get into an infinite loop of picksplit attempts.
+ * Forcing allTheSame mode dodges this problem by ensuring the old tuples will
+ * be split across pages.  (Exercise for the reader: figure out why this
+ * fixes the problem even when there is only one old tuple.)
+ */
+static bool
+checkAllTheSame(spgPickSplitIn *in, spgPickSplitOut *out, bool tooBig,
+				bool *includeNew)
+{
+	int			theNode;
+	int			limit;
+	int			i;
+
+	/* For the moment, assume we can include the new leaf tuple */
+	*includeNew = true;
+
+	/* If there's only the new leaf tuple, don't select allTheSame mode */
+	if (in->nTuples <= 1)
+		return false;
+
+	/* If tuple set doesn't fit on one page, ignore the new tuple in test */
+	limit = tooBig ? in->nTuples - 1 : in->nTuples;
+
+	/* Check to see if more than one node is populated */
+	theNode = out->mapTuplesToNodes[0];
+	for (i = 1; i < limit; i++)
+	{
+		if (out->mapTuplesToNodes[i] != theNode)
+			return false;
+	}
+
+	/* Nope, so override the picksplit function's decisions */
+
+	/* If the new tuple is in its own node, it can't be included in split */
+	if (tooBig && out->mapTuplesToNodes[in->nTuples - 1] != theNode)
+		*includeNew = false;
+
+	out->nNodes = 8;			/* arbitrary number of child nodes */
+
+	/* Random assignment of tuples to nodes (note we include new tuple) */
+	for (i = 0; i < in->nTuples; i++)
+		out->mapTuplesToNodes[i] = i % out->nNodes;
+
+	/* The opclass may not use node labels, but if it does, duplicate 'em */
+	if (out->nodeLabels)
+	{
+		Datum	theLabel = out->nodeLabels[theNode];
+
+		out->nodeLabels = (Datum *) palloc(sizeof(Datum) * out->nNodes);
+		for (i = 0; i < out->nNodes; i++)
+			out->nodeLabels[i] = theLabel;
+	}
+
+	/* We don't touch the prefix or the leaf tuple datum assignments */
+
+	return true;
+}
+
+/*
+ * current points to a leaf-tuple chain that we wanted to add newLeafTuple to,
+ * but the chain has to be split because there's not enough room to add
+ * newLeafTuple to its page.
+ *
+ * This function splits the leaf tuple set according to picksplit's rules,
+ * creating one or more new chains that are spread across the current page
+ * and an additional leaf page (we assume that two leaf pages will be
+ * sufficient).  A new inner tuple is created, and the parent downlink
+ * pointer is updated to point to that inner tuple instead of the leaf chain.
+ *
+ * On exit, current contains the address of the new inner tuple.
+ *
+ * Returns true if we successfully inserted newLeafTuple during this function,
+ * false if caller still has to do it (meaning another picksplit operation is
+ * probably needed).  Failure could occur if the picksplit result is fairly
+ * unbalanced, or if newLeafTuple is just plain too big to fit on a page.
+ * Because we force the picksplit result to be at least two chains, each
+ * cycle will get rid of at least one leaf tuple from the chain, so the loop
+ * will eventually terminate if lack of balance is the issue.  If the tuple
+ * is too big, we assume that repeated picksplit operations will eventually
+ * make it small enough by repeated prefix-stripping.  A broken opclass could
+ * make this an infinite loop, though.
+ */
+static bool
+doPickSplit(Relation index, SpGistState *state,
+			SPPageDesc *current, SPPageDesc *parent,
+			SpGistLeafTuple newLeafTuple, int level, bool isNew)
+{
+	bool		insertedNew = false;
+	spgPickSplitIn in;
+	spgPickSplitOut out;
+	bool		includeNew;
+	int			i,
+				max,
+				n;
+	SpGistInnerTuple innerTuple;
+	SpGistNodeTuple node,
+			   *nodes;
+	Buffer		newInnerBuffer,
+				newLeafBuffer;
+	ItemPointerData *heapPtrs;
+	uint8	   *leafPageSelect;
+	int		   *leafSizes;
+	OffsetNumber *toDelete;
+	OffsetNumber *toInsert;
+	OffsetNumber redirectTuplePos = InvalidOffsetNumber;
+	OffsetNumber startOffsets[2];
+	SpGistLeafTuple *newLeafs;
+	int			spaceToDelete;
+	int			currentFreeSpace;
+	int			totalLeafSizes;
+	bool		allTheSame;
+	XLogRecData rdata[10];
+	int			nRdata;
+	spgxlogPickSplit xlrec;
+	char	   *leafdata,
+			   *leafptr;
+	SPPageDesc	saveCurrent;
+	int			nToDelete,
+				nToInsert,
+				maxToInclude;
+
+	in.level = level;
+
+	/*
+	 * Allocate per-leaf-tuple work arrays with max possible size
+	 */
+	max = PageGetMaxOffsetNumber(current->page);
+	n = max + 1;
+	in.datums = (Datum *) palloc(sizeof(Datum) * n);
+	heapPtrs = (ItemPointerData *) palloc(sizeof(ItemPointerData) * n);
+	toDelete = (OffsetNumber *) palloc(sizeof(OffsetNumber) * n);
+	toInsert = (OffsetNumber *) palloc(sizeof(OffsetNumber) * n);
+	newLeafs = (SpGistLeafTuple *) palloc(sizeof(SpGistLeafTuple) * n);
+	leafPageSelect = (uint8 *) palloc(sizeof(uint8) * n);
+
+	xlrec.node = index->rd_node;
+	STORE_STATE(state, xlrec.stateSrc);
+
+	/*
+	 * Form list of leaf tuples which will be distributed as split result;
+	 * also, count up the amount of space that will be freed from current.
+	 * (Note that in the non-root case, we won't actually delete the old
+	 * tuples, only replace them with redirects or placeholders.)
+	 */
+	nToInsert = 0;
+	nToDelete = 0;
+	spaceToDelete = 0;
+	if (current->blkno == SPGIST_HEAD_BLKNO)
+	{
+		/*
+		 * We are splitting the root (which up to now is also a leaf page).
+		 * Its tuples are not linked, so scan sequentially to get them all.
+		 * We ignore the original value of current->offnum.
+		 */
+		for (i = FirstOffsetNumber; i <= max; i++)
+		{
+			SpGistLeafTuple it;
+
+			it = (SpGistLeafTuple) PageGetItem(current->page,
+											PageGetItemId(current->page, i));
+			if (it->tupstate == SPGIST_LIVE)
+			{
+				in.datums[nToInsert] = SGLTDATUM(it, state);
+				heapPtrs[nToInsert] = it->heapPtr;
+				nToInsert++;
+				toDelete[nToDelete] = i;
+				nToDelete++;
+				/* we will delete the tuple altogether, so count full space */
+				spaceToDelete += it->size + sizeof(ItemIdData);
+			}
+			else				/* tuples on root should be live */
+				elog(ERROR, "unexpected SPGiST tuple state: %d", it->tupstate);
+		}
+	}
+	else
+	{
+		/* Normal case, just collect the leaf tuples in the chain */
+		i = current->offnum;
+		while (i != InvalidOffsetNumber)
+		{
+			SpGistLeafTuple it;
+
+			Assert(i >= FirstOffsetNumber && i <= max);
+			it = (SpGistLeafTuple) PageGetItem(current->page,
+											PageGetItemId(current->page, i));
+			if (it->tupstate == SPGIST_LIVE)
+			{
+				in.datums[nToInsert] = SGLTDATUM(it, state);
+				heapPtrs[nToInsert] = it->heapPtr;
+				nToInsert++;
+				toDelete[nToDelete] = i;
+				nToDelete++;
+				/* we will not delete the tuple, only replace with dead */
+				Assert(it->size >= SGDTSIZE);
+				spaceToDelete += it->size - SGDTSIZE;
+			}
+			else if (it->tupstate == SPGIST_DEAD)
+			{
+				/* We could see a DEAD tuple as first/only chain item */
+				Assert(i == current->offnum);
+				Assert(it->nextOffset == InvalidOffsetNumber);
+				toDelete[nToDelete] = i;
+				nToDelete++;
+				/* replacing it with redirect will save no space */
+			}
+			else
+				elog(ERROR, "unexpected SPGiST tuple state: %d", it->tupstate);
+
+			i = it->nextOffset;
+		}
+	}
+	in.nTuples = nToInsert;
+
+	/*
+	 * We may not actually insert new tuple because another picksplit may be
+	 * necessary due to too large value, but we will try to to allocate enough
+	 * space to include it; and in any case it has to be included in the input
+	 * for the picksplit function.  So don't increment nToInsert yet.
+	 */
+	in.datums[in.nTuples] = SGLTDATUM(newLeafTuple, state);
+	heapPtrs[in.nTuples] = newLeafTuple->heapPtr;
+	in.nTuples++;
+
+	/*
+	 * Perform split using user-defined method.
+	 */
+	memset(&out, 0, sizeof(out));
+
+	FunctionCall2Coll(&state->picksplitFn,
+					  index->rd_indcollation[0],
+					  PointerGetDatum(&in),
+					  PointerGetDatum(&out));
+
+	/*
+	 * Form new leaf tuples and count up the total space needed.
+	 */
+	totalLeafSizes = 0;
+	for (i = 0; i < in.nTuples; i++)
+	{
+		newLeafs[i] = spgFormLeafTuple(state, heapPtrs + i,
+									   out.leafTupleDatums[i]);
+		totalLeafSizes += newLeafs[i]->size + sizeof(ItemIdData);
+	}
+
+	/*
+	 * Check to see if the picksplit function failed to separate the values,
+	 * ie, it put them all into the same child node.  If so, select allTheSame
+	 * mode and create a random split instead.  See comments for
+	 * checkAllTheSame as to why we need to know if the new leaf tuples could
+	 * fit on one page.
+	 */
+	allTheSame = checkAllTheSame(&in, &out,
+								 totalLeafSizes > SPGIST_PAGE_CAPACITY,
+								 &includeNew);
+
+	/*
+	 * If checkAllTheSame decided we must exclude the new tuple, don't
+	 * consider it any further.
+	 */
+	if (includeNew)
+		maxToInclude = in.nTuples;
+	else
+	{
+		maxToInclude = in.nTuples - 1;
+		totalLeafSizes -= newLeafs[in.nTuples - 1]->size + sizeof(ItemIdData);
+	}
+
+	/*
+	 * Allocate per-node work arrays.  Since checkAllTheSame could replace
+	 * out.nNodes with a value larger than the number of tuples on the input
+	 * page, we can't allocate these arrays before here.
+	 */
+	nodes = (SpGistNodeTuple *) palloc(sizeof(SpGistNodeTuple) * out.nNodes);
+	leafSizes = (int *) palloc0(sizeof(int) * out.nNodes);
+
+	/*
+	 * Form nodes of inner tuple and inner tuple itself
+	 */
+	for (i = 0; i < out.nNodes; i++)
+	{
+		Datum		label = (Datum) 0;
+		bool		isnull = (out.nodeLabels == NULL);
+
+		if (!isnull)
+			label = out.nodeLabels[i];
+		nodes[i] = spgFormNodeTuple(state, label, isnull);
+	}
+	innerTuple = spgFormInnerTuple(state,
+								   out.hasPrefix, out.prefixDatum,
+								   out.nNodes, nodes);
+	innerTuple->allTheSame = allTheSame;
+
+	/*
+	 * Update nodes[] array to point into the newly formed innerTuple, so
+	 * that we can adjust their downlinks below.
+	 */
+	SGITITERATE(innerTuple, i, node)
+	{
+		nodes[i] = node;
+	}
+
+	/*
+	 * Re-scan new leaf tuples and count up the space needed under each node.
+	 */
+	for (i = 0; i < maxToInclude; i++)
+	{
+		n = out.mapTuplesToNodes[i];
+		if (n < 0 || n >= out.nNodes)
+			elog(ERROR, "inconsistent result of SPGiST picksplit function");
+		leafSizes[n] += newLeafs[i]->size + sizeof(ItemIdData);
+	}
+
+	/*
+	 * To perform the split, we must insert a new inner tuple, which can't
+	 * go on a leaf page; and unless we are splitting the root page, we
+	 * must then update the parent tuple's downlink to point to the inner
+	 * tuple.  If there is room, we'll put the new inner tuple on the same
+	 * page as the parent tuple, otherwise we need another non-leaf buffer.
+	 * But if the parent page is the root, we can't add the new inner tuple
+	 * there, because the root page must have only one inner tuple.
+	 */
+	xlrec.initInner = false;
+	if (parent->buffer != InvalidBuffer &&
+		parent->blkno != SPGIST_HEAD_BLKNO &&
+		(SpGistPageGetFreeSpace(parent->page, 1) >=
+		 innerTuple->size + sizeof(ItemIdData)))
+	{
+		/* New inner tuple will fit on parent page */
+		newInnerBuffer = parent->buffer;
+	}
+	else if (parent->buffer != InvalidBuffer)
+	{
+		/* Send tuple to page with next triple parity (see README) */
+		newInnerBuffer = SpGistGetBuffer(index,
+										 GBUF_INNER_PARITY(parent->blkno + 1),
+										 innerTuple->size + sizeof(ItemIdData),
+										 &xlrec.initInner);
+	}
+	else
+	{
+		/* Root page split ... inner tuple will go to root page */
+		newInnerBuffer = InvalidBuffer;
+	}
+
+	/*----------
+	 * Because a WAL record can't involve more than four buffers, we can
+	 * only afford to deal with two leaf pages in each picksplit action,
+	 * ie the current page and at most one other.
+	 *
+	 * The new leaf tuples converted from the existing ones should require
+	 * the same or less space, and therefore should all fit onto one page
+	 * (although that's not necessarily the current page, since we can't
+	 * delete the old tuples but only replace them with placeholders).
+	 * However, the incoming new tuple might not also fit, in which case
+	 * we might need another picksplit cycle to reduce it some more.
+	 *
+	 * If there's not room to put everything back onto the current page,
+	 * then we decide on a per-node basis which tuples go to the new page.
+	 * (We do it like that because leaf tuple chains can't cross pages,
+	 * so we must place all leaf tuples belonging to the same parent node
+	 * on the same page.)
+	 *
+	 * If we are splitting the root page (turning it from a leaf page into an
+	 * inner page), then no leaf tuples can go back to the current page; they
+	 * must all go somewhere else.
+	 *----------
+	 */
+	if (current->blkno != SPGIST_HEAD_BLKNO)
+		currentFreeSpace = PageGetExactFreeSpace(current->page) + spaceToDelete;
+	else
+		currentFreeSpace = 0;	/* prevent assigning any tuples to current */
+
+	xlrec.initDest = false;
+
+	if (totalLeafSizes <= currentFreeSpace)
+	{
+		/* All the leaf tuples will fit on current page */
+		newLeafBuffer = InvalidBuffer;
+		/* mark new leaf tuple as included in insertions, if allowed */
+		if (includeNew)
+		{
+			nToInsert++;
+			insertedNew = true;
+		}
+		for (i = 0; i < nToInsert; i++)
+			leafPageSelect[i] = 0;		/* signifies current page */
+	}
+	else if (in.nTuples == 1 && totalLeafSizes > SPGIST_PAGE_CAPACITY)
+	{
+		/*
+		 * We're trying to split up a long value by repeated suffixing, but
+		 * it's not going to fit yet.  Don't bother allocating a second leaf
+		 * buffer that we won't be able to use.
+		 */
+		newLeafBuffer = InvalidBuffer;
+		Assert(includeNew);
+		Assert(nToInsert == 0);
+	}
+	else
+	{
+		/* We will need another leaf page */
+		uint8	   *nodePageSelect;
+		int			curspace;
+		int			newspace;
+
+		newLeafBuffer = SpGistGetBuffer(index, GBUF_LEAF,
+										Min(totalLeafSizes,
+											SPGIST_PAGE_CAPACITY),
+										&xlrec.initDest);
+		/*
+		 * Attempt to assign node groups to the two pages.  We might fail to
+		 * do so, even if totalLeafSizes is less than the available space,
+		 * because we can't split a group across pages.
+		 */
+		nodePageSelect = (uint8 *) palloc(sizeof(uint8) * out.nNodes);
+
+		curspace = currentFreeSpace;
+		newspace = PageGetExactFreeSpace(BufferGetPage(newLeafBuffer));
+		for (i = 0; i < out.nNodes; i++)
+		{
+			if (leafSizes[i] <= curspace)
+			{
+				nodePageSelect[i] = 0; /* signifies current page */
+				curspace -= leafSizes[i];
+			}
+			else
+			{
+				nodePageSelect[i] = 1; /* signifies new leaf page */
+				newspace -= leafSizes[i];
+			}
+		}
+		if (curspace >= 0 && newspace >= 0)
+		{
+			/* Successful assignment, so we can include the new leaf tuple */
+			if (includeNew)
+			{
+				nToInsert++;
+				insertedNew = true;
+			}
+		}
+		else if (includeNew)
+		{
+			/* We must exclude the new leaf tuple from the split */
+			int		nodeOfNewTuple = out.mapTuplesToNodes[in.nTuples - 1];
+
+			leafSizes[nodeOfNewTuple] -=
+				newLeafs[in.nTuples - 1]->size + sizeof(ItemIdData);
+
+			/* Repeat the node assignment process --- should succeed now */
+			curspace = currentFreeSpace;
+			newspace = PageGetExactFreeSpace(BufferGetPage(newLeafBuffer));
+			for (i = 0; i < out.nNodes; i++)
+			{
+				if (leafSizes[i] <= curspace)
+				{
+					nodePageSelect[i] = 0; /* signifies current page */
+					curspace -= leafSizes[i];
+				}
+				else
+				{
+					nodePageSelect[i] = 1; /* signifies new leaf page */
+					newspace -= leafSizes[i];
+				}
+			}
+			if (curspace < 0 || newspace < 0)
+				elog(ERROR, "failed to divide leaf tuple groups across pages");
+		}
+		else
+		{
+			/* oops, we already excluded new tuple ... should not get here */
+			elog(ERROR, "failed to divide leaf tuple groups across pages");
+		}
+		/* Expand the per-node assignments to be shown per leaf tuple */
+		for (i = 0; i < nToInsert; i++)
+		{
+			n = out.mapTuplesToNodes[i];
+			leafPageSelect[i] = nodePageSelect[n];
+		}
+	}
+
+	/* Start preparing WAL record */
+	xlrec.blknoSrc = current->blkno;
+	xlrec.blknoDest = InvalidBlockNumber;
+	xlrec.nDelete = 0;
+	xlrec.initSrc = isNew;
+
+	leafdata = leafptr = (char *) palloc(totalLeafSizes);
+
+	ACCEPT_RDATA_DATA(&xlrec, MAXALIGN(sizeof(xlrec)), 0);
+	ACCEPT_RDATA_DATA(innerTuple, innerTuple->size, 1);
+	nRdata = 2;
+
+	/* Here we begin making the changes to the target pages */
+	START_CRIT_SECTION();
+
+	/*
+	 * Delete old leaf tuples from current buffer, except when we're splitting
+	 * the root; in that case there's no need because we'll re-init the page
+	 * below.  We do this first to make room for reinserting new leaf tuples.
+	 */
+	if (current->blkno != SPGIST_HEAD_BLKNO)
+	{
+		/*
+		 * Init buffer instead of deleting individual tuples, but only if
+		 * there aren't any other live tuples and only during build; otherwise
+		 * we need to set a redirection tuple for concurrent scans.
+		 */
+		if (state->isBuild &&
+			nToDelete + SpGistPageGetOpaque(current->page)->nPlaceholder ==
+			PageGetMaxOffsetNumber(current->page))
+		{
+			SpGistInitBuffer(current->buffer, SPGIST_LEAF);
+			xlrec.initSrc = true;
+		}
+		else if (isNew)
+		{
+			/* don't expose the freshly init'd buffer as a backup block */
+			Assert(nToDelete == 0);
+		}
+		else
+		{
+			xlrec.nDelete = nToDelete;
+			ACCEPT_RDATA_DATA(toDelete,
+							  MAXALIGN(sizeof(OffsetNumber) * nToDelete),
+							  nRdata);
+			nRdata++;
+			ACCEPT_RDATA_BUFFER(current->buffer, nRdata);
+			nRdata++;
+
+			if (!state->isBuild)
+			{
+				/*
+				 * Need to create redirect tuple (it will point to new inner
+				 * tuple) but right now the new tuple's location is not known
+				 * yet.  So, set the redirection pointer to "impossible" value
+				 * and remember its position to update tuple later.
+				 */
+				if (nToDelete > 0)
+					redirectTuplePos = toDelete[0];
+				spgPageIndexMultiDelete(state, current->page,
+										toDelete, nToDelete,
+										SPGIST_REDIRECT,
+										SPGIST_PLACEHOLDER,
+										SPGIST_METAPAGE_BLKNO,
+										FirstOffsetNumber);
+			}
+			else
+			{
+				/*
+				 * During index build there is not concurrent searches, so we
+				 * don't need to create redirection tuple.
+				 */
+				spgPageIndexMultiDelete(state, current->page,
+										toDelete, nToDelete,
+										SPGIST_PLACEHOLDER,
+										SPGIST_PLACEHOLDER,
+										InvalidBlockNumber,
+										InvalidOffsetNumber);
+			}
+		}
+	}
+
+	/*
+	 * Put leaf tuples on proper pages, and update downlinks in innerTuple's
+	 * nodes.
+	 */
+	startOffsets[0] = startOffsets[1] = InvalidOffsetNumber;
+	for (i = 0; i < nToInsert; i++)
+	{
+		SpGistLeafTuple it = newLeafs[i];
+		Buffer	leafBuffer;
+		BlockNumber leafBlock;
+		OffsetNumber newoffset;
+
+		/* Which page is it going to? */
+		leafBuffer = leafPageSelect[i] ? newLeafBuffer : current->buffer;
+		leafBlock = BufferGetBlockNumber(leafBuffer);
+
+		/* Link tuple into correct chain for its node */
+		n = out.mapTuplesToNodes[i];
+
+		if (ItemPointerIsValid(&nodes[n]->t_tid))
+		{
+			Assert(ItemPointerGetBlockNumber(&nodes[n]->t_tid) == leafBlock);
+			it->nextOffset = ItemPointerGetOffsetNumber(&nodes[n]->t_tid);
+		}
+		else
+			it->nextOffset = InvalidOffsetNumber;
+
+		/* Insert it on page */
+		newoffset = SpGistPageAddNewItem(state, BufferGetPage(leafBuffer),
+										 (Item) it, it->size,
+										 &startOffsets[leafPageSelect[i]],
+										 false);
+		toInsert[i] = newoffset;
+
+		/* ... and complete the chain linking */
+		ItemPointerSet(&nodes[n]->t_tid, leafBlock, newoffset);
+
+		/* Also copy leaf tuple into WAL data */
+		memcpy(leafptr, newLeafs[i], newLeafs[i]->size);
+		leafptr += newLeafs[i]->size;
+	}
+
+	/*
+	 * We're done modifying the other leaf buffer (if any), so mark it dirty.
+	 * current->buffer will be marked below, after we're entirely done
+	 * modifying it.
+	 */
+	if (newLeafBuffer != InvalidBuffer)
+	{
+		MarkBufferDirty(newLeafBuffer);
+		/* also save block number for WAL */
+		xlrec.blknoDest = BufferGetBlockNumber(newLeafBuffer);
+		if (!xlrec.initDest)
+		{
+			ACCEPT_RDATA_BUFFER(newLeafBuffer, nRdata);
+			nRdata++;
+		}
+	}
+
+	xlrec.nInsert = nToInsert;
+	ACCEPT_RDATA_DATA(toInsert,
+					  MAXALIGN(sizeof(OffsetNumber) * nToInsert),
+					  nRdata);
+	nRdata++;
+	ACCEPT_RDATA_DATA(leafPageSelect,
+					  MAXALIGN(sizeof(uint8) * nToInsert),
+					  nRdata);
+	nRdata++;
+	ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, nRdata);
+	nRdata++;
+
+	/* Remember current buffer, since we're about to change "current" */
+	saveCurrent = *current;
+
+	/*
+	 * Store the new innerTuple
+	 */
+	if (newInnerBuffer == parent->buffer && newInnerBuffer != InvalidBuffer)
+	{
+		/*
+		 * new inner tuple goes to parent page
+		 */
+		Assert(current->buffer != parent->buffer);
+
+		/* Repoint "current" at the new inner tuple */
+		current->blkno = parent->blkno;
+		current->buffer = parent->buffer;
+		current->page = parent->page;
+		xlrec.blknoInner = current->blkno;
+		xlrec.offnumInner = current->offnum =
+			SpGistPageAddNewItem(state, current->page,
+								 (Item) innerTuple, innerTuple->size,
+								 NULL, false);
+
+		/*
+		 * Update parent node link and mark parent page dirty
+		 */
+		xlrec.blknoParent = parent->blkno;
+		xlrec.offnumParent = parent->offnum;
+		xlrec.nodeI = parent->node;
+		saveNodeLink(index, parent, current->blkno, current->offnum);
+
+		ACCEPT_RDATA_BUFFER(parent->buffer, nRdata);
+		nRdata++;
+
+		/*
+		 * Update redirection link (in old current buffer)
+		 */
+		if (redirectTuplePos != InvalidOffsetNumber)
+			setRedirectionTuple(&saveCurrent, redirectTuplePos,
+								current->blkno, current->offnum);
+
+		/* Done modifying old current buffer, mark it dirty */
+		MarkBufferDirty(saveCurrent.buffer);
+	}
+	else if (parent->buffer != InvalidBuffer)
+	{
+		/*
+		 * new inner tuple will be stored on a new page
+		 */
+		Assert(newInnerBuffer != InvalidBuffer);
+
+		/* Repoint "current" at the new inner tuple */
+		current->buffer = newInnerBuffer;
+		current->blkno = BufferGetBlockNumber(current->buffer);
+		current->page = BufferGetPage(current->buffer);
+		xlrec.blknoInner = current->blkno;
+		xlrec.offnumInner = current->offnum =
+			SpGistPageAddNewItem(state, current->page,
+								 (Item) innerTuple, innerTuple->size,
+								 NULL, false);
+
+		/* Done modifying new current buffer, mark it dirty */
+		MarkBufferDirty(current->buffer);
+
+		/*
+		 * Update parent node link and mark parent page dirty
+		 */
+		xlrec.blknoParent = parent->blkno;
+		xlrec.offnumParent = parent->offnum;
+		xlrec.nodeI = parent->node;
+		saveNodeLink(index, parent, current->blkno, current->offnum);
+
+		ACCEPT_RDATA_BUFFER(current->buffer, nRdata);
+		nRdata++;
+		ACCEPT_RDATA_BUFFER(parent->buffer, nRdata);
+		nRdata++;
+
+		/*
+		 * Update redirection link (in old current buffer)
+		 */
+		if (redirectTuplePos != InvalidOffsetNumber)
+			setRedirectionTuple(&saveCurrent, redirectTuplePos,
+								current->blkno, current->offnum);
+
+		/* Done modifying old current buffer, mark it dirty */
+		MarkBufferDirty(saveCurrent.buffer);
+	}
+	else
+	{
+		/*
+		 * Splitting root page, which was a leaf but now becomes inner page
+		 * (and so "current" continues to point at it)
+		 */
+		Assert(current->blkno == SPGIST_HEAD_BLKNO);
+		Assert(redirectTuplePos == InvalidOffsetNumber);
+
+		SpGistInitBuffer(current->buffer, 0);
+		xlrec.initInner = true;
+
+		xlrec.blknoInner = current->blkno;
+		xlrec.offnumInner = current->offnum =
+			PageAddItem(current->page, (Item) innerTuple, innerTuple->size,
+						InvalidOffsetNumber, false, false);
+		if (current->offnum != FirstOffsetNumber)
+			elog(ERROR, "failed to add item of size %u to SPGiST index page",
+				 innerTuple->size);
+
+		/* No parent link to update, nor redirection to do */
+		xlrec.blknoParent = InvalidBlockNumber;
+		xlrec.offnumParent = InvalidOffsetNumber;
+		xlrec.nodeI = 0;
+
+		/* Done modifying new current buffer, mark it dirty */
+		MarkBufferDirty(current->buffer);
+
+		/* saveCurrent doesn't represent a different buffer */
+		saveCurrent.buffer = InvalidBuffer;
+	}
+
+	if (RelationNeedsWAL(index))
+	{
+		XLogRecPtr	recptr;
+
+		/* Issue the WAL record */
+		recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT, rdata);
+
+		/* Update page LSNs on all affected pages */
+		if (newLeafBuffer != InvalidBuffer)
+		{
+			Page		page = BufferGetPage(newLeafBuffer);
+
+			PageSetLSN(page, recptr);
+			PageSetTLI(page, ThisTimeLineID);
+		}
+
+		if (saveCurrent.buffer != InvalidBuffer)
+		{
+			Page		page = BufferGetPage(saveCurrent.buffer);
+
+			PageSetLSN(page, recptr);
+			PageSetTLI(page, ThisTimeLineID);
+		}
+
+		PageSetLSN(current->page, recptr);
+		PageSetTLI(current->page, ThisTimeLineID);
+
+		if (parent->buffer != InvalidBuffer)
+		{
+			PageSetLSN(parent->page, recptr);
+			PageSetTLI(parent->page, ThisTimeLineID);
+		}
+	}
+
+	END_CRIT_SECTION();
+
+	/* Update local free-space cache and unlock buffers */
+	if (newLeafBuffer != InvalidBuffer)
+	{
+		SpGistSetLastUsedPage(index, newLeafBuffer);
+		UnlockReleaseBuffer(newLeafBuffer);
+	}
+	if (saveCurrent.buffer != InvalidBuffer)
+	{
+		SpGistSetLastUsedPage(index, saveCurrent.buffer);
+		UnlockReleaseBuffer(saveCurrent.buffer);
+	}
+
+	return insertedNew;
+}
+
+/*
+ * spgMatchNode action: descend to N'th child node of current inner tuple
+ */
+static void
+spgMatchNodeAction(Relation index, SpGistState *state,
+				   SpGistInnerTuple innerTuple,
+				   SPPageDesc *current, SPPageDesc *parent, int nodeN)
+{
+	int			i;
+	SpGistNodeTuple node;
+
+	/* Release previous parent buffer if any */
+	if (parent->buffer != InvalidBuffer &&
+		parent->buffer != current->buffer)
+	{
+		SpGistSetLastUsedPage(index, parent->buffer);
+		UnlockReleaseBuffer(parent->buffer);
+	}
+
+	/* Repoint parent to specified node of current inner tuple */
+	parent->blkno = current->blkno;
+	parent->buffer = current->buffer;
+	parent->page = current->page;
+	parent->offnum = current->offnum;
+	parent->node = nodeN;
+
+	/* Locate that node */
+	SGITITERATE(innerTuple, i, node)
+	{
+		if (i == nodeN)
+			break;
+	}
+
+	if (i != nodeN)
+		elog(ERROR, "failed to find requested node %d in SPGiST inner tuple",
+			 nodeN);
+
+	/* Point current to the downlink location, if any */
+	if (ItemPointerIsValid(&node->t_tid))
+	{
+		current->blkno = ItemPointerGetBlockNumber(&node->t_tid);
+		current->offnum = ItemPointerGetOffsetNumber(&node->t_tid);
+	}
+	else
+	{
+		/* Downlink is empty, so we'll need to find a new page */
+		current->blkno = InvalidBlockNumber;
+		current->offnum = InvalidOffsetNumber;
+	}
+
+	current->buffer = InvalidBuffer;
+	current->page = NULL;
+}
+
+/*
+ * spgAddNode action: add a node to the inner tuple at current
+ */
+static void
+spgAddNodeAction(Relation index, SpGistState *state,
+				 SpGistInnerTuple innerTuple,
+				 SPPageDesc *current, SPPageDesc *parent,
+				 int nodeN, Datum nodeLabel)
+{
+	SpGistInnerTuple newInnerTuple;
+	XLogRecData rdata[5];
+	spgxlogAddNode xlrec;
+
+	/* Construct new inner tuple with additional node */
+	newInnerTuple = addNode(state, innerTuple, nodeLabel, nodeN);
+
+	/* Prepare WAL record */
+	xlrec.node = index->rd_node;
+	STORE_STATE(state, xlrec.stateSrc);
+	xlrec.blkno = current->blkno;
+	xlrec.offnum = current->offnum;
+
+	/* we don't fill these unless we need to change the parent downlink */
+	xlrec.blknoParent = InvalidBlockNumber;
+	xlrec.offnumParent = InvalidOffsetNumber;
+	xlrec.nodeI = 0;
+
+	/* we don't fill these unless tuple has to be moved */
+	xlrec.blknoNew = InvalidBlockNumber;
+	xlrec.offnumNew = InvalidOffsetNumber;
+	xlrec.newPage = false;
+
+	ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
+	/* we assume sizeof(xlrec) is at least int-aligned */
+	ACCEPT_RDATA_DATA(newInnerTuple, newInnerTuple->size, 1);
+	ACCEPT_RDATA_BUFFER(current->buffer, 2);
+
+	if (PageGetExactFreeSpace(current->page) >=
+		newInnerTuple->size - innerTuple->size)
+	{
+		/*
+		 * We can replace the inner tuple by new version in-place
+		 */
+		START_CRIT_SECTION();
+
+		PageIndexTupleDelete(current->page, current->offnum);
+		if (PageAddItem(current->page,
+						(Item) newInnerTuple, newInnerTuple->size,
+						current->offnum, false, false) != current->offnum)
+			elog(ERROR, "failed to add item of size %u to SPGiST index page",
+				 newInnerTuple->size);
+
+		MarkBufferDirty(current->buffer);
+
+		if (RelationNeedsWAL(index))
+		{
+			XLogRecPtr	recptr;
+
+			recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, rdata);
+
+			PageSetLSN(current->page, recptr);
+			PageSetTLI(current->page, ThisTimeLineID);
+		}
+
+		END_CRIT_SECTION();
+	}
+	else
+	{
+		/*
+		 * move inner tuple to another page, and update parent
+		 */
+		SpGistDeadTuple dt;
+		SPPageDesc	saveCurrent;
+
+		/*
+		 * It should not be possible to get here for the root page, since we
+		 * allow only one inner tuple on the root page, and spgFormInnerTuple
+		 * always checks that inner tuples don't exceed the size of a page.
+		 */
+		if (current->blkno == SPGIST_HEAD_BLKNO)
+			elog(ERROR, "cannot enlarge root tuple any more");
+		Assert(parent->buffer != InvalidBuffer);
+
+		saveCurrent = *current;
+
+		xlrec.blknoParent = parent->blkno;
+		xlrec.offnumParent = parent->offnum;
+		xlrec.nodeI = parent->node;
+
+		/*
+		 * obtain new buffer with the same parity as current, since it will
+		 * be a child of same parent tuple
+		 */
+		current->buffer = SpGistGetBuffer(index,
+										  GBUF_INNER_PARITY(current->blkno),
+										  newInnerTuple->size + sizeof(ItemIdData),
+										  &xlrec.newPage);
+		current->blkno = BufferGetBlockNumber(current->buffer);
+		current->page = BufferGetPage(current->buffer);
+
+		xlrec.blknoNew = current->blkno;
+
+		/*
+		 * Let's just make real sure new current isn't same as old.  Right
+		 * now that's impossible, but if SpGistGetBuffer ever got smart enough
+		 * to delete placeholder tuples before checking space, maybe it
+		 * wouldn't be impossible.  The case would appear to work except that
+		 * WAL replay would be subtly wrong, so I think a mere assert isn't
+		 * enough here.
+		 */
+		 if (xlrec.blknoNew == xlrec.blkno)
+			 elog(ERROR, "SPGiST new buffer shouldn't be same as old buffer");
+
+		/*
+		 * New current and parent buffer will both be modified; but note that
+		 * parent buffer could be same as either new or old current.
+		 */
+		ACCEPT_RDATA_BUFFER(current->buffer, 3);
+		if (parent->buffer != current->buffer &&
+			parent->buffer != saveCurrent.buffer)
+			ACCEPT_RDATA_BUFFER(parent->buffer, 4);
+
+		START_CRIT_SECTION();
+
+		/* insert new ... */
+		xlrec.offnumNew = current->offnum =
+			SpGistPageAddNewItem(state, current->page,
+								 (Item) newInnerTuple, newInnerTuple->size,
+								 NULL, false);
+
+		MarkBufferDirty(current->buffer);
+
+		/* update parent's downlink and mark parent page dirty */
+		saveNodeLink(index, parent, current->blkno, current->offnum);
+
+		/*
+		 * Replace old tuple with a placeholder or redirection tuple.  Unless
+		 * doing an index build, we have to insert a redirection tuple for
+		 * possible concurrent scans.  We can't just delete it in any case,
+		 * because that could change the offsets of other tuples on the page,
+		 * breaking downlinks from their parents.
+		 */
+		if (state->isBuild)
+			dt = spgFormDeadTuple(state, SPGIST_PLACEHOLDER,
+								  InvalidBlockNumber, InvalidOffsetNumber);
+		else
+			dt = spgFormDeadTuple(state, SPGIST_REDIRECT,
+								  current->blkno, current->offnum);
+
+		PageIndexTupleDelete(saveCurrent.page, saveCurrent.offnum);
+		if (PageAddItem(saveCurrent.page, (Item) dt, dt->size,
+						saveCurrent.offnum,
+						false, false) != saveCurrent.offnum)
+			elog(ERROR, "failed to add item of size %u to SPGiST index page",
+				 dt->size);
+
+		if (state->isBuild)
+			SpGistPageGetOpaque(saveCurrent.page)->nPlaceholder++;
+		else
+			SpGistPageGetOpaque(saveCurrent.page)->nRedirection++;
+
+		MarkBufferDirty(saveCurrent.buffer);
+
+		if (RelationNeedsWAL(index))
+		{
+			XLogRecPtr	recptr;
+
+			recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, rdata);
+
+			/* we don't bother to check if any of these are redundant */
+			PageSetLSN(current->page, recptr);
+			PageSetTLI(current->page, ThisTimeLineID);
+			PageSetLSN(parent->page, recptr);
+			PageSetTLI(parent->page, ThisTimeLineID);
+			PageSetLSN(saveCurrent.page, recptr);
+			PageSetTLI(saveCurrent.page, ThisTimeLineID);
+		}
+
+		END_CRIT_SECTION();
+
+		/* Release saveCurrent if it's not same as current or parent */
+		if (saveCurrent.buffer != current->buffer &&
+			saveCurrent.buffer != parent->buffer)
+		{
+			SpGistSetLastUsedPage(index, saveCurrent.buffer);
+			UnlockReleaseBuffer(saveCurrent.buffer);
+		}
+	}
+}
+
+/*
+ * spgSplitNode action: split inner tuple at current into prefix and postfix
+ */
+static void
+spgSplitNodeAction(Relation index, SpGistState *state,
+				   SpGistInnerTuple innerTuple,
+				   SPPageDesc *current, spgChooseOut *out)
+{
+	SpGistInnerTuple prefixTuple,
+				postfixTuple;
+	SpGistNodeTuple node,
+			   *nodes;
+	BlockNumber postfixBlkno;
+	OffsetNumber postfixOffset;
+	int			i;
+	XLogRecData rdata[5];
+	spgxlogSplitTuple xlrec;
+	Buffer		newBuffer = InvalidBuffer;
+
+	/*
+	 * Construct new prefix tuple, containing a single node with the
+	 * specified label.  (We'll update the node's downlink to point to the
+	 * new postfix tuple, below.)
+	 */
+	node = spgFormNodeTuple(state, out->result.splitTuple.nodeLabel, false);
+
+	prefixTuple = spgFormInnerTuple(state,
+									out->result.splitTuple.prefixHasPrefix,
+									out->result.splitTuple.prefixPrefixDatum,
+									1, &node);
+
+	/* it must fit in the space that innerTuple now occupies */
+	if (prefixTuple->size > innerTuple->size)
+		elog(ERROR, "SPGiST inner-tuple split must not produce longer prefix");
+
+	/*
+	 * Construct new postfix tuple, containing all nodes of innerTuple with
+	 * same node datums, but with the prefix specified by the picksplit
+	 * function.
+	 */
+	nodes = palloc(sizeof(SpGistNodeTuple) * innerTuple->nNodes);
+	SGITITERATE(innerTuple, i, node)
+	{
+		nodes[i] = node;
+	}
+
+	postfixTuple = spgFormInnerTuple(state,
+									 out->result.splitTuple.postfixHasPrefix,
+								   out->result.splitTuple.postfixPrefixDatum,
+									 innerTuple->nNodes, nodes);
+
+	/* Postfix tuple is allTheSame if original tuple was */
+	postfixTuple->allTheSame = innerTuple->allTheSame;
+
+	/* prep data for WAL record */
+	xlrec.node = index->rd_node;
+	xlrec.newPage = false;
+
+	ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
+	/* we assume sizeof(xlrec) is at least int-aligned */
+	ACCEPT_RDATA_DATA(prefixTuple, prefixTuple->size, 1);
+	ACCEPT_RDATA_DATA(postfixTuple, postfixTuple->size, 2);
+	ACCEPT_RDATA_BUFFER(current->buffer, 3);
+
+	/*
+	 * If we can't fit both tuples on the current page, get a new page for the
+	 * postfix tuple.  In particular, can't split to the root page.
+	 *
+	 * For the space calculation, note that prefixTuple replaces innerTuple
+	 * but postfixTuple will be a new entry.
+	 */
+	if (current->blkno == SPGIST_HEAD_BLKNO ||
+		SpGistPageGetFreeSpace(current->page, 1) + innerTuple->size <
+		prefixTuple->size + postfixTuple->size + sizeof(ItemIdData))
+	{
+		/*
+		 * Choose page with next triple parity, because postfix tuple is a
+		 * child of prefix one
+		 */
+		newBuffer = SpGistGetBuffer(index,
+									GBUF_INNER_PARITY(current->blkno + 1),
+									postfixTuple->size + sizeof(ItemIdData),
+									&xlrec.newPage);
+		ACCEPT_RDATA_BUFFER(newBuffer, 4);
+	}
+
+	START_CRIT_SECTION();
+
+	/*
+	 * Replace old tuple by prefix tuple
+	 */
+	PageIndexTupleDelete(current->page, current->offnum);
+	xlrec.offnumPrefix = PageAddItem(current->page,
+									 (Item) prefixTuple, prefixTuple->size,
+									 current->offnum, false, false);
+	if (xlrec.offnumPrefix != current->offnum)
+		elog(ERROR, "failed to add item of size %u to SPGiST index page",
+			 prefixTuple->size);
+	xlrec.blknoPrefix = current->blkno;
+
+	/*
+	 * put postfix tuple into appropriate page
+	 */
+	if (newBuffer == InvalidBuffer)
+	{
+		xlrec.blknoPostfix = postfixBlkno = current->blkno;
+		xlrec.offnumPostfix = postfixOffset =
+			SpGistPageAddNewItem(state, current->page,
+								 (Item) postfixTuple, postfixTuple->size,
+								 NULL, false);
+	}
+	else
+	{
+		xlrec.blknoPostfix = postfixBlkno = BufferGetBlockNumber(newBuffer);
+		xlrec.offnumPostfix = postfixOffset =
+			SpGistPageAddNewItem(state, BufferGetPage(newBuffer),
+								 (Item) postfixTuple, postfixTuple->size,
+								 NULL, false);
+		MarkBufferDirty(newBuffer);
+	}
+
+	/*
+	 * And set downlink pointer in the prefix tuple to point to postfix tuple.
+	 * (We can't avoid this step by doing the above two steps in opposite
+	 * order, because there might not be enough space on the page to insert
+	 * the postfix tuple first.)  We have to update the local copy of the
+	 * prefixTuple too, because that's what will be written to WAL.
+	 */
+	updateNodeLink(prefixTuple, 0, postfixBlkno, postfixOffset);
+	prefixTuple = (SpGistInnerTuple) PageGetItem(current->page,
+							  PageGetItemId(current->page, current->offnum));
+	updateNodeLink(prefixTuple, 0, postfixBlkno, postfixOffset);
+
+	MarkBufferDirty(current->buffer);
+
+	if (RelationNeedsWAL(index))
+	{
+		XLogRecPtr	recptr;
+
+		recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE, rdata);
+
+		PageSetLSN(current->page, recptr);
+		PageSetTLI(current->page, ThisTimeLineID);
+
+		if (newBuffer != InvalidBuffer)
+		{
+			PageSetLSN(BufferGetPage(newBuffer), recptr);
+			PageSetTLI(BufferGetPage(newBuffer), ThisTimeLineID);
+		}
+	}
+
+	END_CRIT_SECTION();
+
+	/* Update local free-space cache and release buffer */
+	if (newBuffer != InvalidBuffer)
+	{
+		SpGistSetLastUsedPage(index, newBuffer);
+		UnlockReleaseBuffer(newBuffer);
+	}
+}
+
+/*
+ * Insert one item into the index
+ */
+void
+spgdoinsert(Relation index, SpGistState *state,
+			ItemPointer heapPtr, Datum datum)
+{
+	int			level = 0;
+	Datum		leafDatum;
+	int			leafSize;
+	SPPageDesc	current,
+				parent;
+
+	/*
+	 * Since we don't use index_form_tuple in this AM, we have to make sure
+	 * value to be inserted is not toasted; FormIndexDatum doesn't guarantee
+	 * that.
+	 */
+	if (state->attType.attlen == -1)
+		datum = PointerGetDatum(PG_DETOAST_DATUM(datum));
+
+	leafDatum = datum;
+
+	/*
+	 * Compute space needed for a leaf tuple containing the given datum.
+	 *
+	 * If it isn't gonna fit, and the opclass can't reduce the datum size by
+	 * suffixing, bail out now rather than getting into an endless loop.
+	 */
+	leafSize = SGLTHDRSZ + sizeof(ItemIdData) +
+		SpGistGetTypeSize(&state->attType, leafDatum);
+
+	if (leafSize > SPGIST_PAGE_CAPACITY && !state->config.longValuesOK)
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+			errmsg("index row size %lu exceeds maximum %lu for index \"%s\"",
+				   (unsigned long) (leafSize - sizeof(ItemIdData)),
+				   (unsigned long) (SPGIST_PAGE_CAPACITY - sizeof(ItemIdData)),
+				   RelationGetRelationName(index)),
+		  errhint("Values larger than a buffer page cannot be indexed.")));
+
+	/* Initialize "current" to the root page */
+	current.blkno = SPGIST_HEAD_BLKNO;
+	current.buffer = InvalidBuffer;
+	current.page = NULL;
+	current.offnum = FirstOffsetNumber;
+	current.node = -1;
+
+	/* "parent" is invalid for the moment */
+	parent.blkno = InvalidBlockNumber;
+	parent.buffer = InvalidBuffer;
+	parent.page = NULL;
+	parent.offnum = InvalidOffsetNumber;
+	parent.node = -1;
+
+	for (;;)
+	{
+		bool		isNew = false;
+
+		/*
+		 * Bail out if query cancel is pending.  We must have this somewhere
+		 * in the loop since a broken opclass could produce an infinite
+		 * picksplit loop.
+		 */
+		CHECK_FOR_INTERRUPTS();
+
+		if (current.blkno == InvalidBlockNumber)
+		{
+			/*
+			 * Create a leaf page.  If leafSize is too large to fit on a page,
+			 * we won't actually use the page yet, but it simplifies the API
+			 * for doPickSplit to always have a leaf page at hand; so just
+			 * quietly limit our request to a page size.
+			 */
+			current.buffer = SpGistGetBuffer(index, GBUF_LEAF,
+											 Min(leafSize,
+												 SPGIST_PAGE_CAPACITY),
+											 &isNew);
+			current.blkno = BufferGetBlockNumber(current.buffer);
+		}
+		else if (parent.buffer == InvalidBuffer ||
+				 current.blkno != parent.blkno)
+		{
+			current.buffer = ReadBuffer(index, current.blkno);
+			LockBuffer(current.buffer, BUFFER_LOCK_EXCLUSIVE);
+		}
+		else
+		{
+			/* inner tuple can be stored on the same page as parent one */
+			current.buffer = parent.buffer;
+		}
+		current.page = BufferGetPage(current.buffer);
+
+		if (SpGistPageIsLeaf(current.page))
+		{
+			SpGistLeafTuple leafTuple;
+			int			nToSplit,
+						sizeToSplit;
+
+			leafTuple = spgFormLeafTuple(state, heapPtr, leafDatum);
+			if (leafTuple->size + sizeof(ItemIdData) <=
+				SpGistPageGetFreeSpace(current.page, 1))
+			{
+				/* it fits on page, so insert it and we're done */
+				addLeafTuple(index, state, leafTuple,
+							 &current, &parent, isNew);
+				break;
+			}
+			else if ((sizeToSplit =
+					  checkSplitConditions(index, state, &current,
+										   &nToSplit)) < SPGIST_PAGE_CAPACITY / 2 &&
+					 nToSplit < 64 &&
+					 leafTuple->size + sizeof(ItemIdData) + sizeToSplit <= SPGIST_PAGE_CAPACITY)
+			{
+				/*
+				 * the amount of data is pretty small, so just move the whole
+				 * chain to another leaf page rather than splitting it.
+				 */
+				Assert(!isNew);
+				moveLeafs(index, state, &current, &parent, leafTuple);
+				break;			/* we're done */
+			}
+			else
+			{
+				/* picksplit */
+				if (doPickSplit(index, state, &current, &parent,
+								leafTuple, level, isNew))
+					break;		/* doPickSplit installed new tuples */
+
+				/* leaf tuple will not be inserted yet */
+				pfree(leafTuple);
+
+				/*
+				 * current now describes new inner tuple, go insert into it
+				 */
+				Assert(!SpGistPageIsLeaf(current.page));
+				goto process_inner_tuple;
+			}
+		}
+		else	/* non-leaf page */
+		{
+			/*
+			 * Apply the opclass choose function to figure out how to insert
+			 * the given datum into the current inner tuple.
+			 */
+			SpGistInnerTuple innerTuple;
+			spgChooseIn in;
+			spgChooseOut out;
+
+			/*
+			 * spgAddNode and spgSplitTuple cases will loop back to here to
+			 * complete the insertion operation.  Just in case the choose
+			 * function is broken and produces add or split requests
+			 * repeatedly, check for query cancel.
+			 */
+	process_inner_tuple:
+			CHECK_FOR_INTERRUPTS();
+
+			innerTuple = (SpGistInnerTuple) PageGetItem(current.page,
+								PageGetItemId(current.page, current.offnum));
+
+			in.datum = datum;
+			in.leafDatum = leafDatum;
+			in.level = level;
+			in.allTheSame = innerTuple->allTheSame;
+			in.hasPrefix = (innerTuple->prefixSize > 0);
+			in.prefixDatum = SGITDATUM(innerTuple, state);
+			in.nNodes = innerTuple->nNodes;
+			in.nodeLabels = spgExtractNodeLabels(state, innerTuple);
+
+			memset(&out, 0, sizeof(out));
+
+			FunctionCall2Coll(&state->chooseFn,
+							  index->rd_indcollation[0],
+							  PointerGetDatum(&in),
+							  PointerGetDatum(&out));
+
+			if (innerTuple->allTheSame)
+			{
+				/*
+				 * It's not allowed to do an AddNode at an allTheSame tuple.
+				 * Opclass must say "match", in which case we choose a random
+				 * one of the nodes to descend into, or "split".
+				 */
+				if (out.resultType == spgAddNode)
+					elog(ERROR, "cannot add a node to an allTheSame inner tuple");
+				else if (out.resultType == spgMatchNode)
+					out.result.matchNode.nodeN = random() % innerTuple->nNodes;
+			}
+
+			switch (out.resultType)
+			{
+				case spgMatchNode:
+					/* Descend to N'th child node */
+					spgMatchNodeAction(index, state, innerTuple,
+									   &current, &parent,
+									   out.result.matchNode.nodeN);
+					/* Adjust level as per opclass request */
+					level += out.result.matchNode.levelAdd;
+					/* Replace leafDatum and recompute leafSize */
+					leafDatum = out.result.matchNode.restDatum;
+					leafSize = SGLTHDRSZ + sizeof(ItemIdData) +
+						SpGistGetTypeSize(&state->attType, leafDatum);
+
+					/*
+					 * Loop around and attempt to insert the new leafDatum
+					 * at "current" (which might reference an existing child
+					 * tuple, or might be invalid to force us to find a new
+					 * page for the tuple).
+					 *
+					 * Note: if the opclass sets longValuesOK, we rely on the
+					 * choose function to eventually shorten the leafDatum
+					 * enough to fit on a page.  We could add a test here to
+					 * complain if the datum doesn't get visibly shorter each
+					 * time, but that could get in the way of opclasses that
+					 * "simplify" datums in a way that doesn't necessarily
+					 * lead to physical shortening on every cycle.
+					 */
+					break;
+				case spgAddNode:
+					/* AddNode is not sensible if nodes don't have labels */
+					if (in.nodeLabels == NULL)
+						elog(ERROR, "cannot add a node to an inner tuple without node labels");
+					/* Add node to inner tuple, per request */
+					spgAddNodeAction(index, state, innerTuple,
+									 &current, &parent,
+									 out.result.addNode.nodeN,
+									 out.result.addNode.nodeLabel);
+
+					/*
+					 * Retry insertion into the enlarged node.  We assume
+					 * that we'll get a MatchNode result this time.
+					 */
+					goto process_inner_tuple;
+					break;
+				case spgSplitTuple:
+					/* Split inner tuple, per request */
+					spgSplitNodeAction(index, state, innerTuple,
+									   &current, &out);
+
+					/* Retry insertion into the split node */
+					goto process_inner_tuple;
+					break;
+				default:
+					elog(ERROR, "unrecognized SPGiST choose result: %d",
+						 (int) out.resultType);
+					break;
+			}
+		}
+	}							/* end loop */
+
+	/*
+	 * Release any buffers we're still holding.  Beware of possibility that
+	 * current and parent reference same buffer.
+	 */
+	if (current.buffer != InvalidBuffer)
+	{
+		SpGistSetLastUsedPage(index, current.buffer);
+		UnlockReleaseBuffer(current.buffer);
+	}
+	if (parent.buffer != InvalidBuffer &&
+		parent.buffer != current.buffer)
+	{
+		SpGistSetLastUsedPage(index, parent.buffer);
+		UnlockReleaseBuffer(parent.buffer);
+	}
+}
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
new file mode 100644
index 00000000000..4a059bdfedc
--- /dev/null
+++ b/src/backend/access/spgist/spginsert.c
@@ -0,0 +1,219 @@
+/*-------------------------------------------------------------------------
+ *
+ * spginsert.c
+ *	  Externally visible index creation/insertion routines
+ *
+ * All the actual insertion logic is in spgdoinsert.c.
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/spgist/spginsert.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/spgist_private.h"
+#include "catalog/index.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/smgr.h"
+#include "utils/memutils.h"
+
+
+typedef struct
+{
+	SpGistState spgstate;		/* SPGiST's working state */
+	MemoryContext tmpCtx;		/* per-tuple temporary context */
+} SpGistBuildState;
+
+
+/* Callback to process one heap tuple during IndexBuildHeapScan */
+static void
+spgistBuildCallback(Relation index, HeapTuple htup, Datum *values,
+					bool *isnull, bool tupleIsAlive, void *state)
+{
+	SpGistBuildState *buildstate = (SpGistBuildState *) state;
+
+	/* SPGiST doesn't index nulls */
+	if (*isnull == false)
+	{
+		/* Work in temp context, and reset it after each tuple */
+		MemoryContext oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
+
+		spgdoinsert(index, &buildstate->spgstate, &htup->t_self, *values);
+
+		MemoryContextSwitchTo(oldCtx);
+		MemoryContextReset(buildstate->tmpCtx);
+	}
+}
+
+/*
+ * Build an SP-GiST index.
+ */
+Datum
+spgbuild(PG_FUNCTION_ARGS)
+{
+	Relation	heap = (Relation) PG_GETARG_POINTER(0);
+	Relation	index = (Relation) PG_GETARG_POINTER(1);
+	IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
+	IndexBuildResult *result;
+	double		reltuples;
+	SpGistBuildState buildstate;
+	Buffer		metabuffer,
+				rootbuffer;
+
+	if (RelationGetNumberOfBlocks(index) != 0)
+		elog(ERROR, "index \"%s\" already contains data",
+			 RelationGetRelationName(index));
+
+	/*
+	 * Initialize the meta page and root page
+	 */
+	metabuffer = SpGistNewBuffer(index);
+	rootbuffer = SpGistNewBuffer(index);
+
+	Assert(BufferGetBlockNumber(metabuffer) == SPGIST_METAPAGE_BLKNO);
+	Assert(BufferGetBlockNumber(rootbuffer) == SPGIST_HEAD_BLKNO);
+
+	START_CRIT_SECTION();
+
+	SpGistInitMetapage(BufferGetPage(metabuffer));
+	MarkBufferDirty(metabuffer);
+	SpGistInitBuffer(rootbuffer, SPGIST_LEAF);
+	MarkBufferDirty(rootbuffer);
+
+	if (RelationNeedsWAL(index))
+	{
+		XLogRecPtr	recptr;
+		XLogRecData rdata;
+
+		/* WAL data is just the relfilenode */
+		rdata.data = (char *) &(index->rd_node);
+		rdata.len = sizeof(RelFileNode);
+		rdata.buffer = InvalidBuffer;
+		rdata.next = NULL;
+
+		recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX, &rdata);
+
+		PageSetLSN(BufferGetPage(metabuffer), recptr);
+		PageSetTLI(BufferGetPage(metabuffer), ThisTimeLineID);
+		PageSetLSN(BufferGetPage(rootbuffer), recptr);
+		PageSetTLI(BufferGetPage(rootbuffer), ThisTimeLineID);
+	}
+
+	END_CRIT_SECTION();
+
+	UnlockReleaseBuffer(metabuffer);
+	UnlockReleaseBuffer(rootbuffer);
+
+	/*
+	 * Now insert all the heap data into the index
+	 */
+	initSpGistState(&buildstate.spgstate, index);
+	buildstate.spgstate.isBuild = true;
+
+	buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
+											"SP-GiST build temporary context",
+											  ALLOCSET_DEFAULT_MINSIZE,
+											  ALLOCSET_DEFAULT_INITSIZE,
+											  ALLOCSET_DEFAULT_MAXSIZE);
+
+	reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
+								   spgistBuildCallback, (void *) &buildstate);
+
+	MemoryContextDelete(buildstate.tmpCtx);
+
+	SpGistUpdateMetaPage(index);
+
+	result = (IndexBuildResult *) palloc0(sizeof(IndexBuildResult));
+	result->heap_tuples = result->index_tuples = reltuples;
+
+	PG_RETURN_POINTER(result);
+}
+
+/*
+ * Build an empty SPGiST index in the initialization fork
+ */
+Datum
+spgbuildempty(PG_FUNCTION_ARGS)
+{
+	Relation	index = (Relation) PG_GETARG_POINTER(0);
+	Page		page;
+
+	/* Construct metapage. */
+	page = (Page) palloc(BLCKSZ);
+	SpGistInitMetapage(page);
+
+	/* Write the page.	If archiving/streaming, XLOG it. */
+	smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_METAPAGE_BLKNO,
+			  (char *) page, true);
+	if (XLogIsNeeded())
+		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
+					SPGIST_METAPAGE_BLKNO, page);
+
+	/* Likewise for the root page. */
+	SpGistInitPage(page, SPGIST_LEAF);
+
+	smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_HEAD_BLKNO,
+			  (char *) page, true);
+	if (XLogIsNeeded())
+		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
+					SPGIST_HEAD_BLKNO, page);
+
+	/*
+	 * An immediate sync is required even if we xlog'd the pages, because the
+	 * writes did not go through shared buffers and therefore a concurrent
+	 * checkpoint may have moved the redo pointer past our xlog record.
+	 */
+	smgrimmedsync(index->rd_smgr, INIT_FORKNUM);
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * Insert one new tuple into an SPGiST index.
+ */
+Datum
+spginsert(PG_FUNCTION_ARGS)
+{
+	Relation	index = (Relation) PG_GETARG_POINTER(0);
+	Datum	   *values = (Datum *) PG_GETARG_POINTER(1);
+	bool	   *isnull = (bool *) PG_GETARG_POINTER(2);
+	ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
+
+#ifdef NOT_USED
+	Relation	heapRel = (Relation) PG_GETARG_POINTER(4);
+	IndexUniqueCheck checkUnique = (IndexUniqueCheck) PG_GETARG_INT32(5);
+#endif
+	SpGistState spgstate;
+	MemoryContext oldCtx;
+	MemoryContext insertCtx;
+
+	/* SPGiST doesn't index nulls */
+	if (*isnull)
+		PG_RETURN_BOOL(false);
+
+	insertCtx = AllocSetContextCreate(CurrentMemoryContext,
+									  "SP-GiST insert temporary context",
+									  ALLOCSET_DEFAULT_MINSIZE,
+									  ALLOCSET_DEFAULT_INITSIZE,
+									  ALLOCSET_DEFAULT_MAXSIZE);
+	oldCtx = MemoryContextSwitchTo(insertCtx);
+
+	initSpGistState(&spgstate, index);
+
+	spgdoinsert(index, &spgstate, ht_ctid, *values);
+
+	SpGistUpdateMetaPage(index);
+
+	MemoryContextSwitchTo(oldCtx);
+	MemoryContextDelete(insertCtx);
+
+	/* return false since we've not done any unique check */
+	PG_RETURN_BOOL(false);
+}
diff --git a/src/backend/access/spgist/spgkdtreeproc.c b/src/backend/access/spgist/spgkdtreeproc.c
new file mode 100644
index 00000000000..e11d1a35e3a
--- /dev/null
+++ b/src/backend/access/spgist/spgkdtreeproc.c
@@ -0,0 +1,298 @@
+/*-------------------------------------------------------------------------
+ *
+ * spgkdtreeproc.c
+ *	  implementation of k-d tree over points for SP-GiST
+ *
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/spgist/spgkdtreeproc.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gist.h"		/* for RTree strategy numbers */
+#include "access/spgist.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "utils/geo_decls.h"
+
+
+Datum
+spg_kd_config(PG_FUNCTION_ARGS)
+{
+	/* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
+	spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
+
+	cfg->prefixType = FLOAT8OID;
+	cfg->labelType = VOIDOID;	/* we don't need node labels */
+	cfg->longValuesOK = false;
+	PG_RETURN_VOID();
+}
+
+static int
+getSide(double coord, bool isX, Point *tst)
+{
+	double		tstcoord = (isX) ? tst->x : tst->y;
+
+	if (coord == tstcoord)
+		return 0;
+	else if (coord > tstcoord)
+		return 1;
+	else
+		return -1;
+}
+
+Datum
+spg_kd_choose(PG_FUNCTION_ARGS)
+{
+	spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
+	spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
+	Point	   *inPoint = DatumGetPointP(in->datum);
+	double		coord;
+
+	if (in->allTheSame)
+		elog(ERROR, "allTheSame should not occur for k-d trees");
+
+	Assert(in->hasPrefix);
+	coord = DatumGetFloat8(in->prefixDatum);
+
+	Assert(in->nNodes == 2);
+
+	out->resultType = spgMatchNode;
+	out->result.matchNode.nodeN =
+		(getSide(coord, in->level % 2, inPoint) > 0) ? 0 : 1;
+	out->result.matchNode.levelAdd = 1;
+	out->result.matchNode.restDatum = PointPGetDatum(inPoint);
+
+	PG_RETURN_VOID();
+}
+
+typedef struct SortedPoint
+{
+	Point	   *p;
+	int			i;
+} SortedPoint;
+
+static int
+x_cmp(const void *a, const void *b)
+{
+	SortedPoint *pa = (SortedPoint *) a;
+	SortedPoint *pb = (SortedPoint *) b;
+
+	if (pa->p->x == pb->p->x)
+		return 0;
+	return (pa->p->x > pb->p->x) ? 1 : -1;
+}
+
+static int
+y_cmp(const void *a, const void *b)
+{
+	SortedPoint *pa = (SortedPoint *) a;
+	SortedPoint *pb = (SortedPoint *) b;
+
+	if (pa->p->y == pb->p->y)
+		return 0;
+	return (pa->p->y > pb->p->y) ? 1 : -1;
+}
+
+
+Datum
+spg_kd_picksplit(PG_FUNCTION_ARGS)
+{
+	spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
+	spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
+	int			i;
+	int			middle;
+	SortedPoint *sorted;
+	double		coord;
+
+	sorted = palloc(sizeof(*sorted) * in->nTuples);
+	for (i = 0; i < in->nTuples; i++)
+	{
+		sorted[i].p = DatumGetPointP(in->datums[i]);
+		sorted[i].i = i;
+	}
+
+	qsort(sorted, in->nTuples, sizeof(*sorted),
+		  (in->level % 2) ? x_cmp : y_cmp);
+	middle = in->nTuples >> 1;
+	coord = (in->level % 2) ? sorted[middle].p->x : sorted[middle].p->y;
+
+	out->hasPrefix = true;
+	out->prefixDatum = Float8GetDatum(coord);
+
+	out->nNodes = 2;
+	out->nodeLabels = NULL;		/* we don't need node labels */
+
+	out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples);
+	out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples);
+
+	/*
+	 * Note: points that have coordinates exactly equal to coord may get
+	 * classified into either node, depending on where they happen to fall
+	 * in the sorted list.  This is okay as long as the inner_consistent
+	 * function descends into both sides for such cases.  This is better
+	 * than the alternative of trying to have an exact boundary, because
+	 * it keeps the tree balanced even when we have many instances of the
+	 * same point value.  So we should never trigger the allTheSame logic.
+	 */
+	for (i = 0; i < in->nTuples; i++)
+	{
+		Point	   *p = sorted[i].p;
+		int			n = sorted[i].i;
+
+		out->mapTuplesToNodes[n] = (i < middle) ? 0 : 1;
+		out->leafTupleDatums[n] = PointPGetDatum(p);
+	}
+
+	PG_RETURN_VOID();
+}
+
+Datum
+spg_kd_inner_consistent(PG_FUNCTION_ARGS)
+{
+	spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
+	spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
+	Point	   *query;
+	BOX		   *boxQuery;
+	double		coord;
+
+	query = DatumGetPointP(in->query);
+	Assert(in->hasPrefix);
+	coord = DatumGetFloat8(in->prefixDatum);
+
+	if (in->allTheSame)
+		elog(ERROR, "allTheSame should not occur for k-d trees");
+
+	Assert(in->nNodes == 2);
+	out->nodeNumbers = (int *) palloc(sizeof(int) * 2);
+	out->levelAdds = (int *) palloc(sizeof(int) * 2);
+	out->levelAdds[0] = 1;
+	out->levelAdds[1] = 1;
+	out->nNodes = 0;
+
+	switch (in->strategy)
+	{
+		case RTLeftStrategyNumber:
+			out->nNodes = 1;
+			out->nodeNumbers[0] = 0;
+
+			if ((in->level % 2) == 0 || FPge(query->x, coord))
+			{
+				out->nodeNumbers[1] = 1;
+				out->nNodes++;
+			}
+			break;
+		case RTRightStrategyNumber:
+			out->nNodes = 1;
+			out->nodeNumbers[0] = 1;
+
+			if ((in->level % 2) == 0 || FPle(query->x, coord))
+			{
+				out->nodeNumbers[1] = 0;
+				out->nNodes++;
+			}
+			break;
+		case RTSameStrategyNumber:
+			if (in->level % 2)
+			{
+				if (FPle(query->x, coord))
+				{
+					out->nodeNumbers[out->nNodes] = 0;
+					out->nNodes++;
+				}
+				if (FPge(query->x, coord))
+				{
+					out->nodeNumbers[out->nNodes] = 1;
+					out->nNodes++;
+				}
+			}
+			else
+			{
+				if (FPle(query->y, coord))
+				{
+					out->nodeNumbers[out->nNodes] = 0;
+					out->nNodes++;
+				}
+				if (FPge(query->y, coord))
+				{
+					out->nodeNumbers[out->nNodes] = 1;
+					out->nNodes++;
+				}
+			}
+			break;
+		case RTBelowStrategyNumber:
+			out->nNodes = 1;
+			out->nodeNumbers[0] = 0;
+
+			if ((in->level % 2) == 1 || FPge(query->y, coord))
+			{
+				out->nodeNumbers[1] = 1;
+				out->nNodes++;
+			}
+			break;
+		case RTAboveStrategyNumber:
+			out->nNodes = 1;
+			out->nodeNumbers[0] = 1;
+
+			if ((in->level % 2) == 1 || FPle(query->y, coord))
+			{
+				out->nodeNumbers[1] = 0;
+				out->nNodes++;
+			}
+			break;
+		case RTContainedByStrategyNumber:
+
+			/*
+			 * For this operator, the query is a box not a point.  We cheat to
+			 * the extent of assuming that DatumGetPointP won't do anything
+			 * that would be bad for a pointer-to-box.
+			 */
+			boxQuery = DatumGetBoxP(in->query);
+
+			out->nNodes = 1;
+			if (in->level % 2)
+			{
+				if (FPlt(boxQuery->high.x, coord))
+					out->nodeNumbers[0] = 0;
+				else if (FPgt(boxQuery->low.x, coord))
+					out->nodeNumbers[0] = 1;
+				else
+				{
+					out->nodeNumbers[0] = 0;
+					out->nodeNumbers[1] = 1;
+					out->nNodes = 2;
+				}
+			}
+			else
+			{
+				if (FPlt(boxQuery->high.y, coord))
+					out->nodeNumbers[0] = 0;
+				else if (FPgt(boxQuery->low.y, coord))
+					out->nodeNumbers[0] = 1;
+				else
+				{
+					out->nodeNumbers[0] = 0;
+					out->nodeNumbers[1] = 1;
+					out->nNodes = 2;
+				}
+			}
+			break;
+		default:
+			elog(ERROR, "unrecognized strategy number: %d", in->strategy);
+			break;
+	}
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * spg_kd_leaf_consistent() is the same as spg_quad_leaf_consistent(),
+ * since we support the same operators and the same leaf data type.
+ * So we just borrow that function.
+ */
diff --git a/src/backend/access/spgist/spgquadtreeproc.c b/src/backend/access/spgist/spgquadtreeproc.c
new file mode 100644
index 00000000000..0be6e55ad30
--- /dev/null
+++ b/src/backend/access/spgist/spgquadtreeproc.c
@@ -0,0 +1,360 @@
+/*-------------------------------------------------------------------------
+ *
+ * spgquadtreeproc.c
+ *	  implementation of quad tree over points for SP-GiST
+ *
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/spgist/spgquadtreeproc.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gist.h"		/* for RTree strategy numbers */
+#include "access/spgist.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "utils/geo_decls.h"
+
+
+Datum
+spg_quad_config(PG_FUNCTION_ARGS)
+{
+	/* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
+	spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
+
+	cfg->prefixType = POINTOID;
+	cfg->labelType = VOIDOID;	/* we don't need node labels */
+	cfg->longValuesOK = false;
+	PG_RETURN_VOID();
+}
+
+#define SPTEST(f, x, y) \
+	DatumGetBool(DirectFunctionCall2(f, PointPGetDatum(x), PointPGetDatum(y)))
+
+/*
+ * Determine which quadrant a point falls into, relative to the centroid.
+ *
+ * Quadrants are identified like this:
+ *
+ *	 4	|  1
+ *	----+-----
+ *	 3	|  2
+ *
+ * Points on one of the axes are taken to lie in the lowest-numbered
+ * adjacent quadrant.
+ */
+static int2
+getQuadrant(Point *centroid, Point *tst)
+{
+	if ((SPTEST(point_above, tst, centroid) ||
+		 SPTEST(point_horiz, tst, centroid)) &&
+		(SPTEST(point_right, tst, centroid) ||
+		 SPTEST(point_vert, tst, centroid)))
+		return 1;
+
+	if (SPTEST(point_below, tst, centroid) &&
+		(SPTEST(point_right, tst, centroid) ||
+		 SPTEST(point_vert, tst, centroid)))
+		return 2;
+
+	if ((SPTEST(point_below, tst, centroid) ||
+		 SPTEST(point_horiz, tst, centroid)) &&
+		SPTEST(point_left, tst, centroid))
+		return 3;
+
+	if (SPTEST(point_above, tst, centroid) &&
+		SPTEST(point_left, tst, centroid))
+		return 4;
+
+	elog(ERROR, "getQuadrant: impossible case");
+	return 0;
+}
+
+
+Datum
+spg_quad_choose(PG_FUNCTION_ARGS)
+{
+	spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
+	spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
+	Point	   *inPoint = DatumGetPointP(in->datum),
+			   *centroid;
+
+	if (in->allTheSame)
+	{
+		out->resultType = spgMatchNode;
+		/* nodeN will be set by core */
+		out->result.matchNode.levelAdd = 0;
+		out->result.matchNode.restDatum = PointPGetDatum(inPoint);
+		PG_RETURN_VOID();
+	}
+
+	Assert(in->hasPrefix);
+	centroid = DatumGetPointP(in->prefixDatum);
+
+	Assert(in->nNodes == 4);
+
+	out->resultType = spgMatchNode;
+	out->result.matchNode.nodeN = getQuadrant(centroid, inPoint) - 1;
+	out->result.matchNode.levelAdd = 0;
+	out->result.matchNode.restDatum = PointPGetDatum(inPoint);
+
+	PG_RETURN_VOID();
+}
+
+#ifdef USE_MEDIAN
+static int
+x_cmp(const void *a, const void *b, void *arg)
+{
+	Point	   *pa = *(Point **) a;
+	Point	   *pb = *(Point **) b;
+
+	if (pa->x == pb->x)
+		return 0;
+	return (pa->x > pb->x) ? 1 : -1;
+}
+
+static int
+y_cmp(const void *a, const void *b, void *arg)
+{
+	Point	   *pa = *(Point **) a;
+	Point	   *pb = *(Point **) b;
+
+	if (pa->y == pb->y)
+		return 0;
+	return (pa->y > pb->y) ? 1 : -1;
+}
+#endif
+
+Datum
+spg_quad_picksplit(PG_FUNCTION_ARGS)
+{
+	spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
+	spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
+	int			i;
+	Point	   *centroid;
+
+#ifdef USE_MEDIAN
+	/* Use the median values of x and y as the centroid point */
+	Point	  **sorted;
+
+	sorted = palloc(sizeof(*sorted) * in->nTuples);
+	for (i = 0; i < in->nTuples; i++)
+		sorted[i] = DatumGetPointP(in->datums[i]);
+
+	centroid = palloc(sizeof(*centroid));
+
+	qsort(sorted, in->nTuples, sizeof(*sorted), x_cmp);
+	centroid->x = sorted[in->nTuples >> 1]->x;
+	qsort(sorted, in->nTuples, sizeof(*sorted), y_cmp);
+	centroid->y = sorted[in->nTuples >> 1]->y;
+#else
+	/* Use the average values of x and y as the centroid point */
+	centroid = palloc0(sizeof(*centroid));
+
+	for (i = 0; i < in->nTuples; i++)
+	{
+		centroid->x += DatumGetPointP(in->datums[i])->x;
+		centroid->y += DatumGetPointP(in->datums[i])->y;
+	}
+
+	centroid->x /= in->nTuples;
+	centroid->y /= in->nTuples;
+#endif
+
+	out->hasPrefix = true;
+	out->prefixDatum = PointPGetDatum(centroid);
+
+	out->nNodes = 4;
+	out->nodeLabels = NULL;		/* we don't need node labels */
+
+	out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples);
+	out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples);
+
+	for (i = 0; i < in->nTuples; i++)
+	{
+		Point	   *p = DatumGetPointP(in->datums[i]);
+		int			quadrant = getQuadrant(centroid, p) - 1;
+
+		out->leafTupleDatums[i] = PointPGetDatum(p);
+		out->mapTuplesToNodes[i] = quadrant;
+	}
+
+	PG_RETURN_VOID();
+}
+
+
+/* Subroutine to fill out->nodeNumbers[] for spg_quad_inner_consistent */
+static void
+setNodes(spgInnerConsistentOut *out, bool isAll, int first, int second)
+{
+	if (isAll)
+	{
+		out->nNodes = 4;
+		out->nodeNumbers[0] = 0;
+		out->nodeNumbers[1] = 1;
+		out->nodeNumbers[2] = 2;
+		out->nodeNumbers[3] = 3;
+	}
+	else
+	{
+		out->nNodes = 2;
+		out->nodeNumbers[0] = first - 1;
+		out->nodeNumbers[1] = second - 1;
+	}
+}
+
+
+Datum
+spg_quad_inner_consistent(PG_FUNCTION_ARGS)
+{
+	spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
+	spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
+	Point	   *query,
+			   *centroid;
+	BOX		   *boxQuery;
+
+	query = DatumGetPointP(in->query);
+	Assert(in->hasPrefix);
+	centroid = DatumGetPointP(in->prefixDatum);
+
+	if (in->allTheSame)
+	{
+		/* Report that all nodes should be visited */
+		int		i;
+
+		out->nNodes = in->nNodes;
+		out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
+		for (i = 0; i < in->nNodes; i++)
+			out->nodeNumbers[i] = i;
+		PG_RETURN_VOID();
+	}
+
+	Assert(in->nNodes == 4);
+	out->nodeNumbers = (int *) palloc(sizeof(int) * 4);
+
+	switch (in->strategy)
+	{
+		case RTLeftStrategyNumber:
+			setNodes(out, SPTEST(point_left, centroid, query), 3, 4);
+			break;
+		case RTRightStrategyNumber:
+			setNodes(out, SPTEST(point_right, centroid, query), 1, 2);
+			break;
+		case RTSameStrategyNumber:
+			out->nNodes = 1;
+			out->nodeNumbers[0] = getQuadrant(centroid, query) - 1;
+			break;
+		case RTBelowStrategyNumber:
+			setNodes(out, SPTEST(point_below, centroid, query), 2, 3);
+			break;
+		case RTAboveStrategyNumber:
+			setNodes(out, SPTEST(point_above, centroid, query), 1, 4);
+			break;
+		case RTContainedByStrategyNumber:
+
+			/*
+			 * For this operator, the query is a box not a point.  We cheat to
+			 * the extent of assuming that DatumGetPointP won't do anything
+			 * that would be bad for a pointer-to-box.
+			 */
+			boxQuery = DatumGetBoxP(in->query);
+
+			if (DatumGetBool(DirectFunctionCall2(box_contain_pt,
+												 PointerGetDatum(boxQuery),
+												 PointerGetDatum(centroid))))
+			{
+				/* centroid is in box, so descend to all quadrants */
+				setNodes(out, true, 0, 0);
+			}
+			else
+			{
+				/* identify quadrant(s) containing all corners of box */
+				Point		p;
+				int			i,
+							r = 0;
+
+				p = boxQuery->low;
+				r |= 1 << (getQuadrant(centroid, &p) - 1);
+
+				p.y = boxQuery->high.y;
+				r |= 1 << (getQuadrant(centroid, &p) - 1);
+
+				p = boxQuery->high;
+				r |= 1 << (getQuadrant(centroid, &p) - 1);
+
+				p.x = boxQuery->low.x;
+				r |= 1 << (getQuadrant(centroid, &p) - 1);
+
+				/* we must descend into those quadrant(s) */
+				out->nNodes = 0;
+				for (i = 0; i < 4; i++)
+				{
+					if (r & (1 << i))
+					{
+						out->nodeNumbers[out->nNodes] = i;
+						out->nNodes++;
+					}
+				}
+			}
+			break;
+		default:
+			elog(ERROR, "unrecognized strategy number: %d", in->strategy);
+			break;
+	}
+
+	PG_RETURN_VOID();
+}
+
+
+Datum
+spg_quad_leaf_consistent(PG_FUNCTION_ARGS)
+{
+	spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
+	spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
+	Point	   *query = DatumGetPointP(in->query);
+	Point	   *datum = DatumGetPointP(in->leafDatum);
+	bool		res;
+
+	/* all tests are exact */
+	out->recheck = false;
+
+	switch (in->strategy)
+	{
+		case RTLeftStrategyNumber:
+			res = SPTEST(point_left, datum, query);
+			break;
+		case RTRightStrategyNumber:
+			res = SPTEST(point_right, datum, query);
+			break;
+		case RTSameStrategyNumber:
+			res = SPTEST(point_eq, datum, query);
+			break;
+		case RTBelowStrategyNumber:
+			res = SPTEST(point_below, datum, query);
+			break;
+		case RTAboveStrategyNumber:
+			res = SPTEST(point_above, datum, query);
+			break;
+		case RTContainedByStrategyNumber:
+
+			/*
+			 * For this operator, the query is a box not a point.  We cheat to
+			 * the extent of assuming that DatumGetPointP won't do anything
+			 * that would be bad for a pointer-to-box.
+			 */
+			res = SPTEST(box_contain_pt, query, datum);
+			break;
+		default:
+			elog(ERROR, "unrecognized strategy number: %d", in->strategy);
+			res = false;
+			break;
+	}
+
+	PG_RETURN_BOOL(res);
+}
diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c
new file mode 100644
index 00000000000..1c6180b2d24
--- /dev/null
+++ b/src/backend/access/spgist/spgscan.c
@@ -0,0 +1,543 @@
+/*-------------------------------------------------------------------------
+ *
+ * spgscan.c
+ *	  routines for scanning SP-GiST indexes
+ *
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/spgist/spgscan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/relscan.h"
+#include "access/spgist_private.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "utils/datum.h"
+#include "utils/memutils.h"
+
+
+typedef struct ScanStackEntry
+{
+	Datum		reconstructedValue;		/* value reconstructed from parent */
+	int			level;			/* level of items on this page */
+	ItemPointerData ptr;		/* block and offset to scan from */
+} ScanStackEntry;
+
+
+/* Free a ScanStackEntry */
+static void
+freeScanStackEntry(SpGistScanOpaque so, ScanStackEntry *stackEntry)
+{
+	if (!so->state.attType.attbyval &&
+		DatumGetPointer(stackEntry->reconstructedValue) != NULL)
+		pfree(DatumGetPointer(stackEntry->reconstructedValue));
+	pfree(stackEntry);
+}
+
+/* Free the entire stack */
+static void
+freeScanStack(SpGistScanOpaque so)
+{
+	ListCell   *lc;
+
+	foreach(lc, so->scanStack)
+	{
+		freeScanStackEntry(so, (ScanStackEntry *) lfirst(lc));
+	}
+	list_free(so->scanStack);
+	so->scanStack = NIL;
+}
+
+/* Initialize scanStack with a single entry for the root page */
+static void
+resetSpGistScanOpaque(SpGistScanOpaque so)
+{
+	ScanStackEntry *startEntry = palloc0(sizeof(ScanStackEntry));
+
+	ItemPointerSet(&startEntry->ptr, SPGIST_HEAD_BLKNO, FirstOffsetNumber);
+
+	freeScanStack(so);
+	so->scanStack = list_make1(startEntry);
+	so->nPtrs = so->iPtr = 0;
+}
+
+Datum
+spgbeginscan(PG_FUNCTION_ARGS)
+{
+	Relation	rel = (Relation) PG_GETARG_POINTER(0);
+	int			keysz = PG_GETARG_INT32(1);
+	/* ScanKey			scankey = (ScanKey) PG_GETARG_POINTER(2); */
+	IndexScanDesc scan;
+	SpGistScanOpaque so;
+
+	scan = RelationGetIndexScan(rel, keysz, 0);
+
+	so = (SpGistScanOpaque) palloc0(sizeof(SpGistScanOpaqueData));
+	initSpGistState(&so->state, scan->indexRelation);
+	so->tempCxt = AllocSetContextCreate(CurrentMemoryContext,
+										"SP-GiST search temporary context",
+										ALLOCSET_DEFAULT_MINSIZE,
+										ALLOCSET_DEFAULT_INITSIZE,
+										ALLOCSET_DEFAULT_MAXSIZE);
+	resetSpGistScanOpaque(so);
+	scan->opaque = so;
+
+	PG_RETURN_POINTER(scan);
+}
+
+Datum
+spgrescan(PG_FUNCTION_ARGS)
+{
+	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+	SpGistScanOpaque so = (SpGistScanOpaque) scan->opaque;
+	ScanKey		scankey = (ScanKey) PG_GETARG_POINTER(1);
+
+	if (scankey && scan->numberOfKeys > 0)
+	{
+		memmove(scan->keyData, scankey,
+				scan->numberOfKeys * sizeof(ScanKeyData));
+	}
+
+	resetSpGistScanOpaque(so);
+
+	PG_RETURN_VOID();
+}
+
+Datum
+spgendscan(PG_FUNCTION_ARGS)
+{
+	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+	SpGistScanOpaque so = (SpGistScanOpaque) scan->opaque;
+
+	MemoryContextDelete(so->tempCxt);
+
+	PG_RETURN_VOID();
+}
+
+Datum
+spgmarkpos(PG_FUNCTION_ARGS)
+{
+	elog(ERROR, "SPGiST does not support mark/restore");
+	PG_RETURN_VOID();
+}
+
+Datum
+spgrestrpos(PG_FUNCTION_ARGS)
+{
+	elog(ERROR, "SPGiST does not support mark/restore");
+	PG_RETURN_VOID();
+}
+
+/*
+ * Test whether a leaf datum satisfies all the scan keys
+ *
+ * *recheck is set true if any of the operators are lossy
+ */
+static bool
+spgLeafTest(SpGistScanOpaque so, Datum leafDatum,
+			int level, Datum reconstructedValue,
+			bool *recheck)
+{
+	bool		result = true;
+	spgLeafConsistentIn in;
+	spgLeafConsistentOut out;
+	MemoryContext oldCtx;
+	int			i;
+
+	*recheck = false;
+
+	/* set up values that are the same for all quals */
+	in.reconstructedValue = reconstructedValue;
+	in.level = level;
+	in.leafDatum = leafDatum;
+
+	/* Apply each leaf consistent function, working in the temp context */
+	oldCtx = MemoryContextSwitchTo(so->tempCxt);
+	for (i = 0; i < so->numberOfKeys; i++)
+	{
+		in.strategy = so->keyData[i].sk_strategy;
+		in.query = so->keyData[i].sk_argument;
+
+		out.recheck = false;
+
+		result = DatumGetBool(FunctionCall2Coll(&so->state.leafConsistentFn,
+												so->keyData[i].sk_collation,
+												PointerGetDatum(&in),
+												PointerGetDatum(&out)));
+		*recheck |= out.recheck;
+		if (!result)
+			break;
+	}
+	MemoryContextSwitchTo(oldCtx);
+
+	return result;
+}
+
+/*
+ * Walk the tree and report all tuples passing the scan quals to the storeRes
+ * subroutine.
+ *
+ * If scanWholeIndex is true, we'll do just that.  If not, we'll stop at the
+ * next page boundary once we have reported at least one tuple.
+ */
+static void
+spgWalk(Relation index, SpGistScanOpaque so, bool scanWholeIndex,
+		void (*storeRes) (SpGistScanOpaque, ItemPointer, bool))
+{
+	Buffer		buffer = InvalidBuffer;
+	bool		reportedSome = false;
+
+	while (scanWholeIndex || !reportedSome)
+	{
+		ScanStackEntry *stackEntry;
+		BlockNumber blkno;
+		OffsetNumber offset;
+		Page		page;
+
+		/* Pull next to-do item from the list */
+		if (so->scanStack == NIL)
+			break;				/* there are no more pages to scan */
+
+		stackEntry = (ScanStackEntry *) linitial(so->scanStack);
+		so->scanStack = list_delete_first(so->scanStack);
+
+redirect:
+		/* Check for interrupts, just in case of infinite loop */
+		CHECK_FOR_INTERRUPTS();
+
+		blkno = ItemPointerGetBlockNumber(&stackEntry->ptr);
+		offset = ItemPointerGetOffsetNumber(&stackEntry->ptr);
+
+		if (buffer == InvalidBuffer)
+		{
+			buffer = ReadBuffer(index, blkno);
+			LockBuffer(buffer, BUFFER_LOCK_SHARE);
+		}
+		else if (blkno != BufferGetBlockNumber(buffer))
+		{
+			UnlockReleaseBuffer(buffer);
+			buffer = ReadBuffer(index, blkno);
+			LockBuffer(buffer, BUFFER_LOCK_SHARE);
+		}
+		/* else new pointer points to the same page, no work needed */
+
+		page = BufferGetPage(buffer);
+
+		if (SpGistPageIsLeaf(page))
+		{
+			SpGistLeafTuple leafTuple;
+			OffsetNumber max = PageGetMaxOffsetNumber(page);
+			bool		recheck = false;
+
+			if (blkno == SPGIST_HEAD_BLKNO)
+			{
+				/* When root is a leaf, examine all its tuples */
+				for (offset = FirstOffsetNumber; offset <= max; offset++)
+				{
+					leafTuple = (SpGistLeafTuple)
+						PageGetItem(page, PageGetItemId(page, offset));
+					if (leafTuple->tupstate != SPGIST_LIVE)
+					{
+						/* all tuples on root should be live */
+						elog(ERROR, "unexpected SPGiST tuple state: %d",
+							 leafTuple->tupstate);
+					}
+
+					Assert(ItemPointerIsValid(&leafTuple->heapPtr));
+					if (spgLeafTest(so,
+									SGLTDATUM(leafTuple, &so->state),
+									stackEntry->level,
+									stackEntry->reconstructedValue,
+									&recheck))
+					{
+						storeRes(so, &leafTuple->heapPtr, recheck);
+						reportedSome = true;
+					}
+				}
+			}
+			else
+			{
+				/* Normal case: just examine the chain we arrived at */
+				while (offset != InvalidOffsetNumber)
+				{
+					Assert(offset >= FirstOffsetNumber && offset <= max);
+					leafTuple = (SpGistLeafTuple)
+						PageGetItem(page, PageGetItemId(page, offset));
+					if (leafTuple->tupstate != SPGIST_LIVE)
+					{
+						if (leafTuple->tupstate == SPGIST_REDIRECT)
+						{
+							/* redirection tuple should be first in chain */
+							Assert(offset == ItemPointerGetOffsetNumber(&stackEntry->ptr));
+							/* transfer attention to redirect point */
+							stackEntry->ptr = ((SpGistDeadTuple) leafTuple)->pointer;
+							Assert(ItemPointerGetBlockNumber(&stackEntry->ptr) != SPGIST_METAPAGE_BLKNO);
+							goto redirect;
+						}
+						if (leafTuple->tupstate == SPGIST_DEAD)
+						{
+							/* dead tuple should be first in chain */
+							Assert(offset == ItemPointerGetOffsetNumber(&stackEntry->ptr));
+							/* No live entries on this page */
+							Assert(leafTuple->nextOffset == InvalidOffsetNumber);
+							break;
+						}
+						/* We should not arrive at a placeholder */
+						elog(ERROR, "unexpected SPGiST tuple state: %d",
+							 leafTuple->tupstate);
+					}
+
+					Assert(ItemPointerIsValid(&leafTuple->heapPtr));
+					if (spgLeafTest(so,
+									SGLTDATUM(leafTuple, &so->state),
+									stackEntry->level,
+									stackEntry->reconstructedValue,
+									&recheck))
+					{
+						storeRes(so, &leafTuple->heapPtr, recheck);
+						reportedSome = true;
+					}
+
+					offset = leafTuple->nextOffset;
+				}
+			}
+		}
+		else	/* page is inner */
+		{
+			SpGistInnerTuple innerTuple;
+			SpGistNodeTuple node;
+			int			i;
+
+			innerTuple = (SpGistInnerTuple) PageGetItem(page,
+														PageGetItemId(page, offset));
+
+			if (innerTuple->tupstate != SPGIST_LIVE)
+			{
+				if (innerTuple->tupstate == SPGIST_REDIRECT)
+				{
+					/* transfer attention to redirect point */
+					stackEntry->ptr = ((SpGistDeadTuple) innerTuple)->pointer;
+					Assert(ItemPointerGetBlockNumber(&stackEntry->ptr) != SPGIST_METAPAGE_BLKNO);
+					goto redirect;
+				}
+				elog(ERROR, "unexpected SPGiST tuple state: %d",
+					 innerTuple->tupstate);
+			}
+
+			if (so->numberOfKeys == 0)
+			{
+				/*
+				 * This case cannot happen at the moment, because we don't
+				 * set pg_am.amoptionalkey for SP-GiST.  In order for full
+				 * index scans to produce correct answers, we'd need to
+				 * index nulls, which we don't.
+				 */
+				Assert(false);
+
+#ifdef NOT_USED
+				/*
+				 * A full index scan could be done approximately like this,
+				 * but note that reconstruction of indexed values would be
+				 * impossible unless the API for inner_consistent is changed.
+				 */
+				SGITITERATE(innerTuple, i, node)
+				{
+					if (ItemPointerIsValid(&node->t_tid))
+					{
+						ScanStackEntry *newEntry = palloc(sizeof(ScanStackEntry));
+
+						newEntry->ptr = node->t_tid;
+						newEntry->level = -1;
+						newEntry->reconstructedValue = (Datum) 0;
+						so->scanStack = lcons(newEntry, so->scanStack);
+					}
+				}
+#endif
+			}
+			else
+			{
+				spgInnerConsistentIn in;
+				spgInnerConsistentOut out;
+				SpGistNodeTuple *nodes;
+				int		   *andMap;
+				int		   *levelAdds;
+				Datum	   *reconstructedValues;
+				int			j,
+							nMatches = 0;
+				MemoryContext oldCtx;
+
+				/* use temp context for calling inner_consistent */
+				oldCtx = MemoryContextSwitchTo(so->tempCxt);
+
+				/* set up values that are the same for all scankeys */
+				in.reconstructedValue = stackEntry->reconstructedValue;
+				in.level = stackEntry->level;
+				in.allTheSame = innerTuple->allTheSame;
+				in.hasPrefix = (innerTuple->prefixSize > 0);
+				in.prefixDatum = SGITDATUM(innerTuple, &so->state);
+				in.nNodes = innerTuple->nNodes;
+				in.nodeLabels = spgExtractNodeLabels(&so->state, innerTuple);
+
+				/* collect node pointers */
+				nodes = (SpGistNodeTuple *) palloc(sizeof(SpGistNodeTuple) * in.nNodes);
+				SGITITERATE(innerTuple, i, node)
+				{
+					nodes[i] = node;
+				}
+
+				andMap = (int *) palloc0(sizeof(int) * in.nNodes);
+				levelAdds = (int *) palloc0(sizeof(int) * in.nNodes);
+				reconstructedValues = (Datum *) palloc0(sizeof(Datum) * in.nNodes);
+
+				for (j = 0; j < so->numberOfKeys; j++)
+				{
+					in.strategy = so->keyData[j].sk_strategy;
+					in.query = so->keyData[j].sk_argument;
+
+					memset(&out, 0, sizeof(out));
+
+					FunctionCall2Coll(&so->state.innerConsistentFn,
+									  so->keyData[j].sk_collation,
+									  PointerGetDatum(&in),
+									  PointerGetDatum(&out));
+
+					/* If allTheSame, they should all or none of 'em match */
+					if (innerTuple->allTheSame)
+						if (out.nNodes != 0 && out.nNodes != in.nNodes)
+							elog(ERROR, "inconsistent inner_consistent results for allTheSame inner tuple");
+
+					nMatches = 0;
+					for (i = 0; i < out.nNodes; i++)
+					{
+						int		nodeN = out.nodeNumbers[i];
+
+						andMap[nodeN]++;
+						if (andMap[nodeN] == j + 1)
+							nMatches++;
+						if (out.levelAdds)
+							levelAdds[nodeN] = out.levelAdds[i];
+						if (out.reconstructedValues)
+							reconstructedValues[nodeN] = out.reconstructedValues[i];
+					}
+
+					/* quit as soon as all nodes have failed some qual */
+					if (nMatches == 0)
+						break;
+				}
+
+				MemoryContextSwitchTo(oldCtx);
+
+				if (nMatches > 0)
+				{
+					for (i = 0; i < in.nNodes; i++)
+					{
+						if (andMap[i] == so->numberOfKeys &&
+							ItemPointerIsValid(&nodes[i]->t_tid))
+						{
+							ScanStackEntry *newEntry;
+
+							/* Create new work item for this node */
+							newEntry = palloc(sizeof(ScanStackEntry));
+							newEntry->ptr = nodes[i]->t_tid;
+							newEntry->level = stackEntry->level + levelAdds[i];
+							/* Must copy value out of temp context */
+							newEntry->reconstructedValue =
+								datumCopy(reconstructedValues[i],
+										  so->state.attType.attbyval,
+										  so->state.attType.attlen);
+
+							so->scanStack = lcons(newEntry, so->scanStack);
+						}
+					}
+				}
+			}
+		}
+
+		/* done with this scan stack entry */
+		freeScanStackEntry(so, stackEntry);
+		/* clear temp context before proceeding to the next one */
+		MemoryContextReset(so->tempCxt);
+	}
+
+	if (buffer != InvalidBuffer)
+		UnlockReleaseBuffer(buffer);
+}
+
+/* storeRes subroutine for getbitmap case */
+static void
+storeBitmap(SpGistScanOpaque so, ItemPointer heapPtr, bool recheck)
+{
+	tbm_add_tuples(so->tbm, heapPtr, 1, recheck);
+	so->ntids++;
+}
+
+Datum
+spggetbitmap(PG_FUNCTION_ARGS)
+{
+	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+	TIDBitmap  *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
+	SpGistScanOpaque so = (SpGistScanOpaque) scan->opaque;
+
+	/* Copy scankey to *so so we don't need to pass it around separately */
+	so->numberOfKeys = scan->numberOfKeys;
+	so->keyData = scan->keyData;
+
+	so->tbm = tbm;
+	so->ntids = 0;
+
+	spgWalk(scan->indexRelation, so, true, storeBitmap);
+
+	PG_RETURN_INT64(so->ntids);
+}
+
+/* storeRes subroutine for gettuple case */
+static void
+storeGettuple(SpGistScanOpaque so, ItemPointer heapPtr, bool recheck)
+{
+	Assert(so->nPtrs < MaxIndexTuplesPerPage);
+	so->heapPtrs[so->nPtrs] = *heapPtr;
+	so->recheck[so->nPtrs] = recheck;
+	so->nPtrs++;
+}
+
+Datum
+spggettuple(PG_FUNCTION_ARGS)
+{
+	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+	ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
+	SpGistScanOpaque so = (SpGistScanOpaque) scan->opaque;
+
+	if (dir != ForwardScanDirection)
+		elog(ERROR, "SP-GiST only supports forward scan direction");
+
+	/* Copy scankey to *so so we don't need to pass it around separately */
+	so->numberOfKeys = scan->numberOfKeys;
+	so->keyData = scan->keyData;
+
+	for (;;)
+	{
+		if (so->iPtr < so->nPtrs)
+		{
+			/* continuing to return tuples from a leaf page */
+			scan->xs_ctup.t_self = so->heapPtrs[so->iPtr];
+			scan->xs_recheck = so->recheck[so->iPtr];
+			so->iPtr++;
+			PG_RETURN_BOOL(true);
+		}
+
+		so->iPtr = so->nPtrs = 0;
+		spgWalk(scan->indexRelation, so, false, storeGettuple);
+
+		if (so->nPtrs == 0)
+			break;				/* must have completed scan */
+	}
+
+	PG_RETURN_BOOL(false);
+}
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c
new file mode 100644
index 00000000000..b6037978425
--- /dev/null
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -0,0 +1,594 @@
+/*-------------------------------------------------------------------------
+ *
+ * spgtextproc.c
+ *	  implementation of compressed-suffix tree over text
+ *
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/spgist/spgtextproc.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/spgist.h"
+#include "catalog/pg_type.h"
+#include "mb/pg_wchar.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/pg_locale.h"
+
+
+/*
+ * In the worst case, a inner tuple in a text suffix tree could have as many
+ * as 256 nodes (one for each possible byte value).  Each node can take 16
+ * bytes on MAXALIGN=8 machines.  The inner tuple must fit on an index page
+ * of size BLCKSZ.  Rather than assuming we know the exact amount of overhead
+ * imposed by page headers, tuple headers, etc, we leave 100 bytes for that
+ * (the actual overhead should be no more than 56 bytes at this writing, so
+ * there is slop in this number).  The upshot is that the maximum safe prefix
+ * length is this:
+ */
+#define SPGIST_MAX_PREFIX_LENGTH	(BLCKSZ - 256 * 16 - 100)
+
+/* Struct for sorting values in picksplit */
+typedef struct spgNodePtr
+{
+	Datum		d;
+	int			i;
+	uint8		c;
+} spgNodePtr;
+
+
+Datum
+spg_text_config(PG_FUNCTION_ARGS)
+{
+	/* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
+	spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
+
+	cfg->prefixType = TEXTOID;
+	cfg->labelType = CHAROID;
+	cfg->longValuesOK = true;	/* suffixing will shorten long values */
+	PG_RETURN_VOID();
+}
+
+/*
+ * Form a text datum from the given not-necessarily-null-terminated string,
+ * using short varlena header format if possible
+ */
+static Datum
+formTextDatum(const char *data, int datalen)
+{
+	char	   *p;
+
+	p = (char *) palloc(datalen + VARHDRSZ);
+
+	if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
+	{
+		SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
+		if (datalen)
+			memcpy(p + VARHDRSZ_SHORT, data, datalen);
+	}
+	else
+	{
+		SET_VARSIZE(p, datalen + VARHDRSZ);
+		memcpy(p + VARHDRSZ, data, datalen);
+	}
+
+	return PointerGetDatum(p);
+}
+
+/*
+ * Find the length of the common prefix of a and b
+ */
+static int
+commonPrefix(const char *a, const char *b, int lena, int lenb)
+{
+	int			i = 0;
+
+	while (i < lena && i < lenb && *a == *b)
+	{
+		a++;
+		b++;
+		i++;
+	}
+
+	return i;
+}
+
+/*
+ * Binary search an array of uint8 datums for a match to c
+ *
+ * On success, *i gets the match location; on failure, it gets where to insert
+ */
+static bool
+searchChar(Datum *nodeLabels, int nNodes, uint8 c, int *i)
+{
+	int			StopLow = 0,
+				StopHigh = nNodes;
+
+	while (StopLow < StopHigh)
+	{
+		int			StopMiddle = (StopLow + StopHigh) >> 1;
+		uint8		middle = DatumGetUInt8(nodeLabels[StopMiddle]);
+
+		if (c < middle)
+			StopHigh = StopMiddle;
+		else if (c > middle)
+			StopLow = StopMiddle + 1;
+		else
+		{
+			*i = StopMiddle;
+			return true;
+		}
+	}
+
+	*i = StopHigh;
+	return false;
+}
+
+Datum
+spg_text_choose(PG_FUNCTION_ARGS)
+{
+	spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
+	spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
+	text	   *inText = DatumGetTextPP(in->datum);
+	char	   *inStr = VARDATA_ANY(inText);
+	int			inSize = VARSIZE_ANY_EXHDR(inText);
+	uint8		nodeChar = '\0';
+	int			i = 0;
+	int			commonLen = 0;
+
+	/* Check for prefix match, set nodeChar to first byte after prefix */
+	if (in->hasPrefix)
+	{
+		text	   *prefixText = DatumGetTextPP(in->prefixDatum);
+		char	   *prefixStr = VARDATA_ANY(prefixText);
+		int			prefixSize = VARSIZE_ANY_EXHDR(prefixText);
+
+		commonLen = commonPrefix(inStr + in->level,
+								 prefixStr,
+								 inSize - in->level,
+								 prefixSize);
+
+		if (commonLen == prefixSize)
+		{
+			if (inSize - in->level > commonLen)
+				nodeChar = *(uint8 *) (inStr + in->level + commonLen);
+			else
+				nodeChar = '\0';
+		}
+		else
+		{
+			/* Must split tuple because incoming value doesn't match prefix */
+			out->resultType = spgSplitTuple;
+
+			if (commonLen == 0)
+			{
+				out->result.splitTuple.prefixHasPrefix = false;
+			}
+			else
+			{
+				out->result.splitTuple.prefixHasPrefix = true;
+				out->result.splitTuple.prefixPrefixDatum =
+					formTextDatum(prefixStr, commonLen);
+			}
+			out->result.splitTuple.nodeLabel =
+				UInt8GetDatum(*(prefixStr + commonLen));
+
+			if (prefixSize - commonLen == 1)
+			{
+				out->result.splitTuple.postfixHasPrefix = false;
+			}
+			else
+			{
+				out->result.splitTuple.postfixHasPrefix = true;
+				out->result.splitTuple.postfixPrefixDatum =
+					formTextDatum(prefixStr + commonLen + 1,
+								  prefixSize - commonLen - 1);
+			}
+
+			PG_RETURN_VOID();
+		}
+	}
+	else if (inSize > in->level)
+	{
+		nodeChar = *(uint8 *) (inStr + in->level);
+	}
+	else
+	{
+		nodeChar = '\0';
+	}
+
+	/* Look up nodeChar in the node label array */
+	if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
+	{
+		/*
+		 * Descend to existing node.  (If in->allTheSame, the core code will
+		 * ignore our nodeN specification here, but that's OK.  We still
+		 * have to provide the correct levelAdd and restDatum values, and
+		 * those are the same regardless of which node gets chosen by core.)
+		 */
+		out->resultType = spgMatchNode;
+		out->result.matchNode.nodeN = i;
+		out->result.matchNode.levelAdd = commonLen + 1;
+		if (inSize - in->level - commonLen - 1 > 0)
+			out->result.matchNode.restDatum =
+				formTextDatum(inStr + in->level + commonLen + 1,
+							  inSize - in->level - commonLen - 1);
+		else
+			out->result.matchNode.restDatum =
+				formTextDatum(NULL, 0);
+	}
+	else if (in->allTheSame)
+	{
+		/*
+		 * Can't use AddNode action, so split the tuple.  The upper tuple
+		 * has the same prefix as before and uses an empty node label for
+		 * the lower tuple.  The lower tuple has no prefix and the same
+		 * node labels as the original tuple.
+		 */
+		out->resultType = spgSplitTuple;
+		out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
+		out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
+		out->result.splitTuple.nodeLabel = UInt8GetDatum('\0');
+		out->result.splitTuple.postfixHasPrefix = false;
+	}
+	else
+	{
+		/* Add a node for the not-previously-seen nodeChar value */
+		out->resultType = spgAddNode;
+		out->result.addNode.nodeLabel = UInt8GetDatum(nodeChar);
+		out->result.addNode.nodeN = i;
+	}
+
+	PG_RETURN_VOID();
+}
+
+/* qsort comparator to sort spgNodePtr structs by "c" */
+static int
+cmpNodePtr(const void *a, const void *b)
+{
+	const spgNodePtr *aa = (const spgNodePtr *) a;
+	const spgNodePtr *bb = (const spgNodePtr *) b;
+
+	if (aa->c == bb->c)
+		return 0;
+	else if (aa->c > bb->c)
+		return 1;
+	else
+		return -1;
+}
+
+Datum
+spg_text_picksplit(PG_FUNCTION_ARGS)
+{
+	spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
+	spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
+	text	   *text0 = DatumGetTextPP(in->datums[0]);
+	int			i,
+				commonLen;
+	spgNodePtr *nodes;
+
+	/* Identify longest common prefix, if any */
+	commonLen = VARSIZE_ANY_EXHDR(text0);
+	for (i = 1; i < in->nTuples && commonLen > 0; i++)
+	{
+		text	   *texti = DatumGetTextPP(in->datums[i]);
+		int			tmp = commonPrefix(VARDATA_ANY(text0),
+									   VARDATA_ANY(texti),
+									   VARSIZE_ANY_EXHDR(text0),
+									   VARSIZE_ANY_EXHDR(texti));
+
+		if (tmp < commonLen)
+			commonLen = tmp;
+	}
+
+	/*
+	 * Limit the prefix length, if necessary, to ensure that the resulting
+	 * inner tuple will fit on a page.
+	 */
+	commonLen = Min(commonLen, SPGIST_MAX_PREFIX_LENGTH);
+
+	/* Set node prefix to be that string, if it's not empty */
+	if (commonLen == 0)
+	{
+		out->hasPrefix = false;
+	}
+	else
+	{
+		out->hasPrefix = true;
+		out->prefixDatum = formTextDatum(VARDATA_ANY(text0), commonLen);
+	}
+
+	/* Extract the node label (first non-common byte) from each value */
+	nodes = (spgNodePtr *) palloc(sizeof(spgNodePtr) * in->nTuples);
+
+	for (i = 0; i < in->nTuples; i++)
+	{
+		text	   *texti = DatumGetTextPP(in->datums[i]);
+
+		if (commonLen < VARSIZE_ANY_EXHDR(texti))
+			nodes[i].c = *(uint8 *) (VARDATA_ANY(texti) + commonLen);
+		else
+			nodes[i].c = '\0';			/* use \0 if string is all common */
+		nodes[i].i = i;
+		nodes[i].d = in->datums[i];
+	}
+
+	/*
+	 * Sort by label bytes so that we can group the values into nodes.  This
+	 * also ensures that the nodes are ordered by label value, allowing the
+	 * use of binary search in searchChar.
+	 */
+	qsort(nodes, in->nTuples, sizeof(*nodes), cmpNodePtr);
+
+	/* And emit results */
+	out->nNodes = 0;
+	out->nodeLabels = (Datum *) palloc(sizeof(Datum) * in->nTuples);
+	out->mapTuplesToNodes = (int *) palloc(sizeof(int) * in->nTuples);
+	out->leafTupleDatums = (Datum *) palloc(sizeof(Datum) * in->nTuples);
+
+	for (i = 0; i < in->nTuples; i++)
+	{
+		text	   *texti = DatumGetTextPP(nodes[i].d);
+		Datum		leafD;
+
+		if (i == 0 || nodes[i].c != nodes[i - 1].c)
+		{
+			out->nodeLabels[out->nNodes] = UInt8GetDatum(nodes[i].c);
+			out->nNodes++;
+		}
+
+		if (commonLen < VARSIZE_ANY_EXHDR(texti))
+			leafD = formTextDatum(VARDATA_ANY(texti) + commonLen + 1,
+								  VARSIZE_ANY_EXHDR(texti) - commonLen - 1);
+		else
+			leafD = formTextDatum(NULL, 0);
+
+		out->leafTupleDatums[nodes[i].i] = leafD;
+		out->mapTuplesToNodes[nodes[i].i] = out->nNodes - 1;
+	}
+
+	PG_RETURN_VOID();
+}
+
+Datum
+spg_text_inner_consistent(PG_FUNCTION_ARGS)
+{
+	spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
+	spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
+	StrategyNumber strategy = in->strategy;
+	text	   *inText;
+	int			inSize;
+	int			i;
+	text	   *reconstrText = NULL;
+	int			maxReconstrLen = 0;
+	text	   *prefixText = NULL;
+	int			prefixSize = 0;
+
+	/*
+	 * If it's a collation-aware operator, but the collation is C, we can
+	 * treat it as non-collation-aware.
+	 */
+	if (strategy > 10 &&
+		lc_collate_is_c(PG_GET_COLLATION()))
+		strategy -= 10;
+
+	inText = DatumGetTextPP(in->query);
+	inSize = VARSIZE_ANY_EXHDR(inText);
+
+	/*
+	 * Reconstruct values represented at this tuple, including parent data,
+	 * prefix of this tuple if any, and the node label if any.  in->level
+	 * should be the length of the previously reconstructed value, and the
+	 * number of bytes added here is prefixSize or prefixSize + 1.
+	 *
+	 * Note: we assume that in->reconstructedValue isn't toasted and doesn't
+	 * have a short varlena header.  This is okay because it must have been
+	 * created by a previous invocation of this routine, and we always emit
+	 * long-format reconstructed values.
+	 */
+	Assert(in->level == 0 ? DatumGetPointer(in->reconstructedValue) == NULL :
+		   VARSIZE_ANY_EXHDR(DatumGetPointer(in->reconstructedValue)) == in->level);
+
+	maxReconstrLen = in->level + 1;
+	if (in->hasPrefix)
+	{
+		prefixText = DatumGetTextPP(in->prefixDatum);
+		prefixSize = VARSIZE_ANY_EXHDR(prefixText);
+		maxReconstrLen += prefixSize;
+	}
+
+	reconstrText = palloc(VARHDRSZ + maxReconstrLen);
+	SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen);
+
+	if (in->level)
+		memcpy(VARDATA(reconstrText),
+			   VARDATA(DatumGetPointer(in->reconstructedValue)),
+			   in->level);
+	if (prefixSize)
+		memcpy(((char *) VARDATA(reconstrText)) + in->level,
+			   VARDATA_ANY(prefixText),
+			   prefixSize);
+	/* last byte of reconstrText will be filled in below */
+
+	/*
+	 * Scan the child nodes.  For each one, complete the reconstructed value
+	 * and see if it's consistent with the query.  If so, emit an entry into
+	 * the output arrays.
+	 */
+	out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
+	out->levelAdds = (int *) palloc(sizeof(int) * in->nNodes);
+	out->reconstructedValues = (Datum *) palloc(sizeof(Datum) * in->nNodes);
+	out->nNodes = 0;
+
+	for (i = 0; i < in->nNodes; i++)
+	{
+		uint8		nodeChar = DatumGetUInt8(in->nodeLabels[i]);
+		int			thisLen;
+		int			r;
+		bool		res = false;
+
+		/* If nodeChar is zero, don't include it in data */
+		if (nodeChar == '\0')
+			thisLen = maxReconstrLen - 1;
+		else
+		{
+			((char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
+			thisLen = maxReconstrLen;
+		}
+
+		r = memcmp(VARDATA(reconstrText), VARDATA_ANY(inText),
+				   Min(inSize, thisLen));
+
+		switch (strategy)
+		{
+			case BTLessStrategyNumber:
+			case BTLessEqualStrategyNumber:
+				if (r <= 0)
+					res = true;
+				break;
+			case BTEqualStrategyNumber:
+				if (r == 0 && inSize >= thisLen)
+					res = true;
+				break;
+			case BTGreaterEqualStrategyNumber:
+			case BTGreaterStrategyNumber:
+				if (r >= 0)
+					res = true;
+				break;
+			case BTLessStrategyNumber + 10:
+			case BTLessEqualStrategyNumber + 10:
+			case BTGreaterEqualStrategyNumber + 10:
+			case BTGreaterStrategyNumber + 10:
+				/*
+				 * with non-C collation we need to traverse whole tree :-(
+				 */
+				res = true;
+				break;
+			default:
+				elog(ERROR, "unrecognized strategy number: %d",
+					 in->strategy);
+				break;
+		}
+
+		if (res)
+		{
+			out->nodeNumbers[out->nNodes] = i;
+			out->levelAdds[out->nNodes] = thisLen - in->level;
+			SET_VARSIZE(reconstrText, VARHDRSZ + thisLen);
+			out->reconstructedValues[out->nNodes] =
+				datumCopy(PointerGetDatum(reconstrText), false, -1);
+			out->nNodes++;
+		}
+	}
+
+	PG_RETURN_VOID();
+}
+
+Datum
+spg_text_leaf_consistent(PG_FUNCTION_ARGS)
+{
+	spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
+	spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
+	StrategyNumber strategy = in->strategy;
+	text	   *query = DatumGetTextPP(in->query);
+	int			level = in->level;
+	text	   *leafValue,
+			   *reconstrValue = NULL;
+	char	   *fullValue;
+	int			fullLen;
+	int			queryLen;
+	int			r;
+	bool		res;
+
+	/* all tests are exact */
+	out->recheck = false;
+
+	leafValue = DatumGetTextPP(in->leafDatum);
+
+	if (DatumGetPointer(in->reconstructedValue))
+		reconstrValue = DatumGetTextP(in->reconstructedValue);
+
+	Assert(level == 0 ? reconstrValue == NULL :
+		   VARSIZE_ANY_EXHDR(reconstrValue) == level);
+
+	fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
+
+	queryLen = VARSIZE_ANY_EXHDR(query);
+
+	/* For equality, we needn't reconstruct fullValue if not same length */
+	if (strategy == BTEqualStrategyNumber && queryLen != fullLen)
+		PG_RETURN_BOOL(false);
+
+	/* Else, reconstruct the full string represented by this leaf tuple */
+	if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
+	{
+		fullValue = VARDATA(reconstrValue);
+	}
+	else
+	{
+		fullValue = palloc(fullLen);
+		if (level)
+			memcpy(fullValue, VARDATA(reconstrValue), level);
+		if (VARSIZE_ANY_EXHDR(leafValue) > 0)
+			memcpy(fullValue + level, VARDATA_ANY(leafValue),
+				   VARSIZE_ANY_EXHDR(leafValue));
+	}
+
+	/* Run the appropriate type of comparison */
+	if (strategy > 10)
+	{
+		/* Collation-aware comparison */
+		strategy -= 10;
+
+		/* If asserts are enabled, verify encoding of reconstructed string */
+		Assert(pg_verifymbstr(fullValue, fullLen, false));
+
+		r = varstr_cmp(fullValue, Min(queryLen, fullLen),
+					   VARDATA_ANY(query), Min(queryLen, fullLen),
+					   PG_GET_COLLATION());
+	}
+	else
+	{
+		/* Non-collation-aware comparison */
+		r = memcmp(fullValue, VARDATA_ANY(query), Min(queryLen, fullLen));
+	}
+
+	if (r == 0)
+	{
+		if (queryLen > fullLen)
+			r = -1;
+		else if (queryLen < fullLen)
+			r = 1;
+	}
+
+	switch (strategy)
+	{
+		case BTLessStrategyNumber:
+			res = (r < 0);
+			break;
+		case BTLessEqualStrategyNumber:
+			res = (r <= 0);
+			break;
+		case BTEqualStrategyNumber:
+			res = (r == 0);
+			break;
+		case BTGreaterEqualStrategyNumber:
+			res = (r >= 0);
+			break;
+		case BTGreaterStrategyNumber:
+			res = (r > 0);
+			break;
+		default:
+			elog(ERROR, "unrecognized strategy number: %d", in->strategy);
+			res = false;
+			break;
+	}
+
+	PG_RETURN_BOOL(res);
+}
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
new file mode 100644
index 00000000000..d6c01a5f842
--- /dev/null
+++ b/src/backend/access/spgist/spgutils.c
@@ -0,0 +1,850 @@
+/*-------------------------------------------------------------------------
+ *
+ * spgutils.c
+ *	  various support functions for SP-GiST
+ *
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/spgist/spgutils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/reloptions.h"
+#include "access/spgist_private.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "storage/bufmgr.h"
+#include "storage/indexfsm.h"
+#include "storage/lmgr.h"
+#include "utils/lsyscache.h"
+
+
+/* Fill in a SpGistTypeDesc struct with info about the specified data type */
+static void
+fillTypeDesc(SpGistTypeDesc *desc, Oid type)
+{
+	desc->type = type;
+	get_typlenbyval(type, &desc->attlen, &desc->attbyval);
+}
+
+/* Initialize SpGistState for working with the given index */
+void
+initSpGistState(SpGistState *state, Relation index)
+{
+	Oid			atttype;
+	spgConfigIn in;
+
+	/* SPGiST doesn't support multi-column indexes */
+	Assert(index->rd_att->natts == 1);
+
+	/*
+	 * Get the actual data type of the indexed column from the index tupdesc.
+	 * We pass this to the opclass config function so that polymorphic
+	 * opclasses are possible.
+	 */
+	atttype = index->rd_att->attrs[0]->atttypid;
+
+	/* Get the config info for the opclass */
+	in.attType = atttype;
+
+	memset(&state->config, 0, sizeof(state->config));
+
+	FunctionCall2Coll(index_getprocinfo(index, 1, SPGIST_CONFIG_PROC),
+					  index->rd_indcollation[0],
+					  PointerGetDatum(&in),
+					  PointerGetDatum(&state->config));
+
+	/* Get the information we need about each relevant datatype */
+	fillTypeDesc(&state->attType, atttype);
+	fillTypeDesc(&state->attPrefixType, state->config.prefixType);
+	fillTypeDesc(&state->attLabelType, state->config.labelType);
+
+	/* Get lookup info for opclass support procs */
+	fmgr_info_copy(&(state->chooseFn),
+				   index_getprocinfo(index, 1, SPGIST_CHOOSE_PROC),
+				   CurrentMemoryContext);
+	fmgr_info_copy(&(state->picksplitFn),
+				   index_getprocinfo(index, 1, SPGIST_PICKSPLIT_PROC),
+				   CurrentMemoryContext);
+	fmgr_info_copy(&(state->innerConsistentFn),
+				   index_getprocinfo(index, 1, SPGIST_INNER_CONSISTENT_PROC),
+				   CurrentMemoryContext);
+	fmgr_info_copy(&(state->leafConsistentFn),
+				   index_getprocinfo(index, 1, SPGIST_LEAF_CONSISTENT_PROC),
+				   CurrentMemoryContext);
+
+	/* Make workspace for constructing dead tuples */
+	state->deadTupleStorage = palloc0(SGDTSIZE);
+
+	/* Set XID to use in redirection tuples */
+	state->myXid = GetTopTransactionIdIfAny();
+
+	state->isBuild = false;
+}
+
+/*
+ * Allocate a new page (either by recycling, or by extending the index file).
+ *
+ * The returned buffer is already pinned and exclusive-locked.
+ * Caller is responsible for initializing the page by calling SpGistInitBuffer.
+ */
+Buffer
+SpGistNewBuffer(Relation index)
+{
+	Buffer		buffer;
+	bool		needLock;
+
+	/* First, try to get a page from FSM */
+	for (;;)
+	{
+		BlockNumber blkno = GetFreeIndexPage(index);
+
+		if (blkno == InvalidBlockNumber)
+			break;				/* nothing known to FSM */
+
+		/*
+		 * The root page shouldn't ever be listed in FSM, but just in case it
+		 * is, ignore it.
+		 */
+		if (blkno == SPGIST_HEAD_BLKNO)
+			continue;
+
+		buffer = ReadBuffer(index, blkno);
+
+		/*
+		 * We have to guard against the possibility that someone else already
+		 * recycled this page; the buffer may be locked if so.
+		 */
+		if (ConditionalLockBuffer(buffer))
+		{
+			Page		page = BufferGetPage(buffer);
+
+			if (PageIsNew(page))
+				return buffer;	/* OK to use, if never initialized */
+
+			if (SpGistPageIsDeleted(page) || PageIsEmpty(page))
+				return buffer;	/* OK to use */
+
+			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+		}
+
+		/* Can't use it, so release buffer and try again */
+		ReleaseBuffer(buffer);
+	}
+
+	/* Must extend the file */
+	needLock = !RELATION_IS_LOCAL(index);
+	if (needLock)
+		LockRelationForExtension(index, ExclusiveLock);
+
+	buffer = ReadBuffer(index, P_NEW);
+	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+	if (needLock)
+		UnlockRelationForExtension(index, ExclusiveLock);
+
+	return buffer;
+}
+
+/*
+ * Fetch local cache of lastUsedPages info, initializing it from the metapage
+ * if necessary
+ */
+static SpGistCache *
+spgGetCache(Relation index)
+{
+	SpGistCache *cache;
+
+	if (index->rd_amcache == NULL)
+	{
+		Buffer		metabuffer;
+		SpGistMetaPageData *metadata;
+
+		cache = MemoryContextAlloc(index->rd_indexcxt,
+								   sizeof(SpGistCache));
+
+		metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO);
+		LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
+
+		metadata = SpGistPageGetMeta(BufferGetPage(metabuffer));
+
+		if (metadata->magicNumber != SPGIST_MAGIC_NUMBER)
+			elog(ERROR, "index \"%s\" is not an SP-GiST index",
+				 RelationGetRelationName(index));
+
+		*cache = metadata->lastUsedPages;
+
+		UnlockReleaseBuffer(metabuffer);
+
+		index->rd_amcache = cache;
+	}
+	else
+	{
+		cache = (SpGistCache *) index->rd_amcache;
+	}
+
+	return cache;
+}
+
+/*
+ * Update index metapage's lastUsedPages info from local cache, if possible
+ *
+ * Updating meta page isn't critical for index working, so
+ * 1 use ConditionalLockBuffer to improve concurrency
+ * 2 don't WAL-log metabuffer changes to decrease WAL traffic
+ */
+void
+SpGistUpdateMetaPage(Relation index)
+{
+	SpGistCache *cache = (SpGistCache *) index->rd_amcache;
+
+	if (cache != NULL)
+	{
+		Buffer		metabuffer;
+		SpGistMetaPageData *metadata;
+
+		metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO);
+
+		if (ConditionalLockBuffer(metabuffer))
+		{
+			metadata = SpGistPageGetMeta(BufferGetPage(metabuffer));
+			metadata->lastUsedPages = *cache;
+
+			MarkBufferDirty(metabuffer);
+			UnlockReleaseBuffer(metabuffer);
+		}
+		else
+		{
+			ReleaseBuffer(metabuffer);
+		}
+	}
+}
+
+/* Macro to select proper element of lastUsedPages cache depending on flags */
+#define GET_LUP(c, f)	(((f) & GBUF_LEAF) ? \
+						 &(c)->leafPage : \
+						 &(c)->innerPage[(f) & GBUF_PARITY_MASK])
+
+/*
+ * Allocate and initialize a new buffer of the type and parity specified by
+ * flags.  The returned buffer is already pinned and exclusive-locked.
+ *
+ * When requesting an inner page, if we get one with the wrong parity,
+ * we just release the buffer and try again.  We will get a different page
+ * because GetFreeIndexPage will have marked the page used in FSM.  The page
+ * is entered in our local lastUsedPages cache, so there's some hope of
+ * making use of it later in this session, but otherwise we rely on VACUUM
+ * to eventually re-enter the page in FSM, making it available for recycling.
+ * Note that such a page does not get marked dirty here, so unless it's used
+ * fairly soon, the buffer will just get discarded and the page will remain
+ * as it was on disk.
+ *
+ * When we return a buffer to the caller, the page is *not* entered into
+ * the lastUsedPages cache; we expect the caller will do so after it's taken
+ * whatever space it will use.  This is because after the caller has used up
+ * some space, the page might have less space than whatever was cached already
+ * so we'd rather not trash the old cache entry.
+ */
+static Buffer
+allocNewBuffer(Relation index, int flags)
+{
+	SpGistCache *cache = spgGetCache(index);
+
+	for (;;)
+	{
+		Buffer		buffer;
+
+		buffer = SpGistNewBuffer(index);
+		SpGistInitBuffer(buffer, (flags & GBUF_LEAF) ? SPGIST_LEAF : 0);
+
+		if (flags & GBUF_LEAF)
+		{
+			/* Leaf pages have no parity concerns, so just use it */
+			return buffer;
+		}
+		else
+		{
+			BlockNumber blkno = BufferGetBlockNumber(buffer);
+			int		blkParity = blkno % 3;
+
+			if ((flags & GBUF_PARITY_MASK) == blkParity)
+			{
+				/* Page has right parity, use it */
+				return buffer;
+			}
+			else
+			{
+				/* Page has wrong parity, record it in cache and try again */
+				cache->innerPage[blkParity].blkno = blkno;
+				cache->innerPage[blkParity].freeSpace =
+					PageGetExactFreeSpace(BufferGetPage(buffer));
+				UnlockReleaseBuffer(buffer);
+			}
+		}
+	}
+}
+
+/*
+ * Get a buffer of the type and parity specified by flags, having at least
+ * as much free space as indicated by needSpace.  We use the lastUsedPages
+ * cache to assign the same buffer previously requested when possible.
+ * The returned buffer is already pinned and exclusive-locked.
+ *
+ * *isNew is set true if the page was initialized here, false if it was
+ * already valid.
+ */
+Buffer
+SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew)
+{
+	SpGistCache *cache = spgGetCache(index);
+	SpGistLastUsedPage *lup;
+
+	/* Bail out if even an empty page wouldn't meet the demand */
+	if (needSpace > SPGIST_PAGE_CAPACITY)
+		elog(ERROR, "desired SPGiST tuple size is too big");
+
+	/*
+	 * If possible, increase the space request to include relation's
+	 * fillfactor.  This ensures that when we add unrelated tuples to a page,
+	 * we try to keep 100-fillfactor% available for adding tuples that are
+	 * related to the ones already on it.  But fillfactor mustn't cause an
+	 * error for requests that would otherwise be legal.
+	 */
+	needSpace += RelationGetTargetPageFreeSpace(index,
+												SPGIST_DEFAULT_FILLFACTOR);
+	needSpace = Min(needSpace, SPGIST_PAGE_CAPACITY);
+
+	/* Get the cache entry for this flags setting */
+	lup = GET_LUP(cache, flags);
+
+	/* If we have nothing cached, just turn it over to allocNewBuffer */
+	if (lup->blkno == InvalidBlockNumber)
+	{
+		*isNew = true;
+		return allocNewBuffer(index, flags);
+	}
+
+	/* root page should never be in cache */
+	Assert(lup->blkno != SPGIST_HEAD_BLKNO);
+
+	/* If cached freeSpace isn't enough, don't bother looking at the page */
+	if (lup->freeSpace >= needSpace)
+	{
+		Buffer		buffer;
+		Page		page;
+
+		buffer = ReadBuffer(index, lup->blkno);
+
+		if (!ConditionalLockBuffer(buffer))
+		{
+			/*
+			 * buffer is locked by another process, so return a new buffer
+			 */
+			ReleaseBuffer(buffer);
+			*isNew = true;
+			return allocNewBuffer(index, flags);
+		}
+
+		page = BufferGetPage(buffer);
+
+		if (PageIsNew(page) || SpGistPageIsDeleted(page) || PageIsEmpty(page))
+		{
+			/* OK to initialize the page */
+			SpGistInitBuffer(buffer, (flags & GBUF_LEAF) ? SPGIST_LEAF : 0);
+			lup->freeSpace = PageGetExactFreeSpace(page) - needSpace;
+			*isNew = true;
+			return buffer;
+		}
+
+		/*
+		 * Check that page is of right type and has enough space.  We must
+		 * recheck this since our cache isn't necessarily up to date.
+		 */
+		if ((flags & GBUF_LEAF) ? SpGistPageIsLeaf(page) :
+			!SpGistPageIsLeaf(page))
+		{
+			int			freeSpace = PageGetExactFreeSpace(page);
+
+			if (freeSpace >= needSpace)
+			{
+				/* Success, update freespace info and return the buffer */
+				lup->freeSpace = freeSpace - needSpace;
+				*isNew = false;
+				return buffer;
+			}
+		}
+
+		/*
+		 * fallback to allocation of new buffer
+		 */
+		UnlockReleaseBuffer(buffer);
+	}
+
+	/* No success with cache, so return a new buffer */
+	*isNew = true;
+	return allocNewBuffer(index, flags);
+}
+
+/*
+ * Update lastUsedPages cache when done modifying a page.
+ *
+ * We update the appropriate cache entry if it already contained this page
+ * (its freeSpace is likely obsolete), or if this page has more space than
+ * whatever we had cached.
+ */
+void
+SpGistSetLastUsedPage(Relation index, Buffer buffer)
+{
+	SpGistCache *cache = spgGetCache(index);
+	SpGistLastUsedPage *lup;
+	int			freeSpace;
+	Page		page = BufferGetPage(buffer);
+	BlockNumber blkno = BufferGetBlockNumber(buffer);
+	int			flags;
+
+	/* Never enter the root page in cache, though */
+	if (blkno == SPGIST_HEAD_BLKNO)
+		return;
+
+	if (SpGistPageIsLeaf(page))
+		flags = GBUF_LEAF;
+	else
+		flags = GBUF_INNER_PARITY(blkno);
+
+	lup = GET_LUP(cache, flags);
+
+	freeSpace = PageGetExactFreeSpace(page);
+	if (lup->blkno == InvalidBlockNumber || lup->blkno == blkno ||
+		lup->freeSpace < freeSpace)
+	{
+		lup->blkno = blkno;
+		lup->freeSpace = freeSpace;
+	}
+}
+
+/*
+ * Initialize an SPGiST page to empty, with specified flags
+ */
+void
+SpGistInitPage(Page page, uint16 f)
+{
+	SpGistPageOpaque opaque;
+
+	PageInit(page, BLCKSZ, MAXALIGN(sizeof(SpGistPageOpaqueData)));
+	opaque = SpGistPageGetOpaque(page);
+	memset(opaque, 0, sizeof(SpGistPageOpaqueData));
+	opaque->flags = f;
+	opaque->spgist_page_id = SPGIST_PAGE_ID;
+}
+
+/*
+ * Initialize a buffer's page to empty, with specified flags
+ */
+void
+SpGistInitBuffer(Buffer b, uint16 f)
+{
+	Assert(BufferGetPageSize(b) == BLCKSZ);
+	SpGistInitPage(BufferGetPage(b), f);
+}
+
+/*
+ * Initialize metadata page
+ */
+void
+SpGistInitMetapage(Page page)
+{
+	SpGistMetaPageData *metadata;
+
+	SpGistInitPage(page, SPGIST_META);
+	metadata = SpGistPageGetMeta(page);
+	memset(metadata, 0, sizeof(SpGistMetaPageData));
+	metadata->magicNumber = SPGIST_MAGIC_NUMBER;
+
+	/* initialize last-used-page cache to empty */
+	metadata->lastUsedPages.innerPage[0].blkno = InvalidBlockNumber;
+	metadata->lastUsedPages.innerPage[1].blkno = InvalidBlockNumber;
+	metadata->lastUsedPages.innerPage[2].blkno = InvalidBlockNumber;
+	metadata->lastUsedPages.leafPage.blkno = InvalidBlockNumber;
+}
+
+/*
+ * reloptions processing for SPGiST
+ */
+Datum
+spgoptions(PG_FUNCTION_ARGS)
+{
+	Datum		reloptions = PG_GETARG_DATUM(0);
+	bool		validate = PG_GETARG_BOOL(1);
+	bytea	   *result;
+
+	result = default_reloptions(reloptions, validate, RELOPT_KIND_SPGIST);
+
+	if (result)
+		PG_RETURN_BYTEA_P(result);
+	PG_RETURN_NULL();
+}
+
+/*
+ * Get the space needed to store a datum of the indicated type.
+ * Note the result is already rounded up to a MAXALIGN boundary.
+ * Also, we follow the SPGiST convention that pass-by-val types are
+ * just stored in their Datum representation (compare memcpyDatum).
+ */
+unsigned int
+SpGistGetTypeSize(SpGistTypeDesc *att, Datum datum)
+{
+	unsigned int size;
+
+	if (att->attbyval)
+		size = sizeof(Datum);
+	else if (att->attlen > 0)
+		size = att->attlen;
+	else
+		size = VARSIZE_ANY(datum);
+
+	return MAXALIGN(size);
+}
+
+/*
+ * Copy the given datum to *target
+ */
+static void
+memcpyDatum(void *target, SpGistTypeDesc *att, Datum datum)
+{
+	unsigned int size;
+
+	if (att->attbyval)
+	{
+		memcpy(target, &datum, sizeof(Datum));
+	}
+	else
+	{
+		size = (att->attlen > 0) ? att->attlen : VARSIZE_ANY(datum);
+		memcpy(target, DatumGetPointer(datum), size);
+	}
+}
+
+/*
+ * Construct a leaf tuple containing the given heap TID and datum value
+ */
+SpGistLeafTuple
+spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr, Datum datum)
+{
+	SpGistLeafTuple tup;
+	unsigned int size;
+
+	/* compute space needed (note result is already maxaligned) */
+	size = SGLTHDRSZ + SpGistGetTypeSize(&state->attType, datum);
+
+	/*
+	 * Ensure that we can replace the tuple with a dead tuple later.  This
+	 * test is unnecessary given current tuple layouts, but let's be safe.
+	 */
+	if (size < SGDTSIZE)
+		size = SGDTSIZE;
+
+	/* OK, form the tuple */
+	tup = (SpGistLeafTuple) palloc0(size);
+
+	tup->size = size;
+	tup->nextOffset = InvalidOffsetNumber;
+	tup->heapPtr = *heapPtr;
+	memcpyDatum(SGLTDATAPTR(tup), &state->attType, datum);
+
+	return tup;
+}
+
+/*
+ * Construct a node (to go into an inner tuple) containing the given label
+ *
+ * Note that the node's downlink is just set invalid here.  Caller will fill
+ * it in later.
+ */
+SpGistNodeTuple
+spgFormNodeTuple(SpGistState *state, Datum label, bool isnull)
+{
+	SpGistNodeTuple tup;
+	unsigned int size;
+	unsigned short infomask = 0;
+
+	/* compute space needed (note result is already maxaligned) */
+	size = SGNTHDRSZ;
+	if (!isnull)
+		size += SpGistGetTypeSize(&state->attLabelType, label);
+
+	/*
+	 * Here we make sure that the size will fit in the field reserved for it
+	 * in t_info.
+	 */
+	if ((size & INDEX_SIZE_MASK) != size)
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("index row requires %lu bytes, maximum size is %lu",
+						(unsigned long) size,
+						(unsigned long) INDEX_SIZE_MASK)));
+
+	tup = (SpGistNodeTuple) palloc0(size);
+
+	if (isnull)
+		infomask |= INDEX_NULL_MASK;
+	/* we don't bother setting the INDEX_VAR_MASK bit */
+	infomask |= size;
+	tup->t_info = infomask;
+
+	/* The TID field will be filled in later */
+	ItemPointerSetInvalid(&tup->t_tid);
+
+	if (!isnull)
+		memcpyDatum(SGNTDATAPTR(tup), &state->attLabelType, label);
+
+	return tup;
+}
+
+/*
+ * Construct an inner tuple containing the given prefix and node array
+ */
+SpGistInnerTuple
+spgFormInnerTuple(SpGistState *state, bool hasPrefix, Datum prefix,
+				  int nNodes, SpGistNodeTuple *nodes)
+{
+	SpGistInnerTuple tup;
+	unsigned int size;
+	unsigned int prefixSize;
+	int			i;
+	char	   *ptr;
+
+	/* Compute size needed */
+	if (hasPrefix)
+		prefixSize = SpGistGetTypeSize(&state->attPrefixType, prefix);
+	else
+		prefixSize = 0;
+
+	size = SGITHDRSZ + prefixSize;
+
+	/* Note: we rely on node tuple sizes to be maxaligned already */
+	for (i = 0; i < nNodes; i++)
+		size += IndexTupleSize(nodes[i]);
+
+	/*
+	 * Ensure that we can replace the tuple with a dead tuple later.  This
+	 * test is unnecessary given current tuple layouts, but let's be safe.
+	 */
+	if (size < SGDTSIZE)
+		size = SGDTSIZE;
+
+	/*
+	 * Inner tuple should be small enough to fit on a page
+	 */
+	if (size > SPGIST_PAGE_CAPACITY - sizeof(ItemIdData))
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("SPGiST inner tuple size %lu exceeds maximum %lu",
+						(unsigned long) size,
+				(unsigned long) (SPGIST_PAGE_CAPACITY - sizeof(ItemIdData))),
+				 errhint("Values larger than a buffer page cannot be indexed.")));
+
+	/*
+	 * Check for overflow of header fields --- probably can't fail if the
+	 * above succeeded, but let's be paranoid
+	 */
+	if (size > SGITMAXSIZE ||
+		prefixSize > SGITMAXPREFIXSIZE ||
+		nNodes > SGITMAXNNODES)
+		elog(ERROR, "SPGiST inner tuple header field is too small");
+
+	/* OK, form the tuple */
+	tup = (SpGistInnerTuple) palloc0(size);
+
+	tup->nNodes = nNodes;
+	tup->prefixSize = prefixSize;
+	tup->size = size;
+
+	if (hasPrefix)
+		memcpyDatum(SGITDATAPTR(tup), &state->attPrefixType, prefix);
+
+	ptr = (char *) SGITNODEPTR(tup);
+
+	for (i = 0; i < nNodes; i++)
+	{
+		SpGistNodeTuple node = nodes[i];
+
+		memcpy(ptr, node, IndexTupleSize(node));
+		ptr += IndexTupleSize(node);
+	}
+
+	return tup;
+}
+
+/*
+ * Construct a "dead" tuple to replace a tuple being deleted.
+ *
+ * The state can be SPGIST_REDIRECT, SPGIST_DEAD, or SPGIST_PLACEHOLDER.
+ * For a REDIRECT tuple, a pointer (blkno+offset) must be supplied, and
+ * the xid field is filled in automatically.
+ *
+ * This is called in critical sections, so we don't use palloc; the tuple
+ * is built in preallocated storage.  It should be copied before another
+ * call with different parameters can occur.
+ */
+SpGistDeadTuple
+spgFormDeadTuple(SpGistState *state, int tupstate,
+				 BlockNumber blkno, OffsetNumber offnum)
+{
+	SpGistDeadTuple tuple = (SpGistDeadTuple) state->deadTupleStorage;
+
+	tuple->tupstate = tupstate;
+	tuple->size = SGDTSIZE;
+	tuple->nextOffset = InvalidOffsetNumber;
+
+	if (tupstate == SPGIST_REDIRECT)
+	{
+		ItemPointerSet(&tuple->pointer, blkno, offnum);
+		tuple->xid = state->myXid;
+	}
+	else
+	{
+		ItemPointerSetInvalid(&tuple->pointer);
+		tuple->xid = InvalidTransactionId;
+	}
+
+	return tuple;
+}
+
+/*
+ * Extract the label datums of the nodes within innerTuple
+ *
+ * Returns NULL if label datums are NULLs
+ */
+Datum *
+spgExtractNodeLabels(SpGistState *state, SpGistInnerTuple innerTuple)
+{
+	Datum	   *nodeLabels;
+	int			nullcount = 0;
+	int			i;
+	SpGistNodeTuple node;
+
+	nodeLabels = (Datum *) palloc(sizeof(Datum) * innerTuple->nNodes);
+	SGITITERATE(innerTuple, i, node)
+	{
+		if (IndexTupleHasNulls(node))
+			nullcount++;
+		else
+			nodeLabels[i] = SGNTDATUM(node, state);
+	}
+	if (nullcount == innerTuple->nNodes)
+	{
+		/* They're all null, so just return NULL */
+		pfree(nodeLabels);
+		return NULL;
+	}
+	if (nullcount != 0)
+		elog(ERROR, "some but not all node labels are null in SPGiST inner tuple");
+	return nodeLabels;
+}
+
+/*
+ * Add a new item to the page, replacing a PLACEHOLDER item if possible.
+ * Return the location it's inserted at, or InvalidOffsetNumber on failure.
+ *
+ * If startOffset isn't NULL, we start searching for placeholders at
+ * *startOffset, and update that to the next place to search.  This is just
+ * an optimization for repeated insertions.
+ *
+ * If errorOK is false, we throw error when there's not enough room,
+ * rather than returning InvalidOffsetNumber.
+ */
+OffsetNumber
+SpGistPageAddNewItem(SpGistState *state, Page page, Item item, Size size,
+					 OffsetNumber *startOffset, bool errorOK)
+{
+	SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
+	OffsetNumber i,
+				maxoff,
+				offnum;
+
+	if (opaque->nPlaceholder > 0 &&
+		PageGetExactFreeSpace(page) + SGDTSIZE >= MAXALIGN(size))
+	{
+		/* Try to replace a placeholder */
+		maxoff = PageGetMaxOffsetNumber(page);
+		offnum = InvalidOffsetNumber;
+
+		for (;;)
+		{
+			if (startOffset && *startOffset != InvalidOffsetNumber)
+				i = *startOffset;
+			else
+				i = FirstOffsetNumber;
+			for (; i <= maxoff; i++)
+			{
+				SpGistDeadTuple it = (SpGistDeadTuple) PageGetItem(page,
+													PageGetItemId(page, i));
+
+				if (it->tupstate == SPGIST_PLACEHOLDER)
+				{
+					offnum = i;
+					break;
+				}
+			}
+
+			/* Done if we found a placeholder */
+			if (offnum != InvalidOffsetNumber)
+				break;
+
+			if (startOffset && *startOffset != InvalidOffsetNumber)
+			{
+				/* Hint was no good, re-search from beginning */
+				*startOffset = InvalidOffsetNumber;
+				continue;
+			}
+
+			/* Hmm, no placeholder found? */
+			opaque->nPlaceholder = 0;
+			break;
+		}
+
+		if (offnum != InvalidOffsetNumber)
+		{
+			/* Replace the placeholder tuple */
+			PageIndexTupleDelete(page, offnum);
+
+			offnum = PageAddItem(page, item, size, offnum, false, false);
+
+			/*
+			 * We should not have failed given the size check at the top of
+			 * the function, but test anyway.  If we did fail, we must PANIC
+			 * because we've already deleted the placeholder tuple, and
+			 * there's no other way to keep the damage from getting to disk.
+			 */
+			if (offnum != InvalidOffsetNumber)
+			{
+				Assert(opaque->nPlaceholder > 0);
+				opaque->nPlaceholder--;
+				if (startOffset)
+					*startOffset = offnum + 1;
+			}
+			else
+				elog(PANIC, "failed to add item of size %u to SPGiST index page",
+					 size);
+
+			return offnum;
+		}
+	}
+
+	/* No luck in replacing a placeholder, so just add it to the page */
+	offnum = PageAddItem(page, item, size,
+						 InvalidOffsetNumber, false, false);
+
+	if (offnum == InvalidOffsetNumber && !errorOK)
+		elog(ERROR, "failed to add item of size %u to SPGiST index page",
+			 size);
+
+	return offnum;
+}
diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c
new file mode 100644
index 00000000000..90d59920eb6
--- /dev/null
+++ b/src/backend/access/spgist/spgvacuum.c
@@ -0,0 +1,755 @@
+/*-------------------------------------------------------------------------
+ *
+ * spgvacuum.c
+ *	  vacuum for SP-GiST
+ *
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			src/backend/access/spgist/spgvacuum.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/spgist_private.h"
+#include "access/transam.h"
+#include "catalog/storage.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/indexfsm.h"
+#include "storage/lmgr.h"
+#include "storage/procarray.h"
+
+
+/* local state for vacuum operations */
+typedef struct spgBulkDeleteState
+{
+	/* Parameters passed in to spgvacuumscan */
+	IndexVacuumInfo *info;
+	IndexBulkDeleteResult *stats;
+	IndexBulkDeleteCallback callback;
+	void	   *callback_state;
+	/* Additional working state */
+	SpGistState spgstate;
+	TransactionId OldestXmin;
+	BlockNumber lastFilledBlock;
+} spgBulkDeleteState;
+
+
+/*
+ * Vacuum a regular (non-root) leaf page
+ *
+ * We must delete tuples that are targeted for deletion by the VACUUM,
+ * but not move any tuples that are referenced by outside links; we assume
+ * those are the ones that are heads of chains.
+ */
+static void
+vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer)
+{
+	Page		page = BufferGetPage(buffer);
+	spgxlogVacuumLeaf xlrec;
+	XLogRecData rdata[8];
+	OffsetNumber toDead[MaxIndexTuplesPerPage];
+	OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
+	OffsetNumber moveSrc[MaxIndexTuplesPerPage];
+	OffsetNumber moveDest[MaxIndexTuplesPerPage];
+	OffsetNumber chainSrc[MaxIndexTuplesPerPage];
+	OffsetNumber chainDest[MaxIndexTuplesPerPage];
+	OffsetNumber predecessor[MaxIndexTuplesPerPage + 1];
+	bool		deletable[MaxIndexTuplesPerPage + 1];
+	int			nDeletable;
+	OffsetNumber i,
+				max = PageGetMaxOffsetNumber(page);
+
+	memset(predecessor, 0, sizeof(predecessor));
+	memset(deletable, 0, sizeof(deletable));
+	nDeletable = 0;
+
+	/* Scan page, identify tuples to delete, accumulate stats */
+	for (i = FirstOffsetNumber; i <= max; i++)
+	{
+		SpGistLeafTuple lt;
+
+		lt = (SpGistLeafTuple) PageGetItem(page,
+										   PageGetItemId(page, i));
+		if (lt->tupstate == SPGIST_LIVE)
+		{
+			Assert(ItemPointerIsValid(&lt->heapPtr));
+
+			if (bds->callback(&lt->heapPtr, bds->callback_state))
+			{
+				bds->stats->tuples_removed += 1;
+				deletable[i] = true;
+				nDeletable++;
+			}
+			else
+			{
+				bds->stats->num_index_tuples += 1;
+			}
+
+			/* Form predecessor map, too */
+			if (lt->nextOffset != InvalidOffsetNumber)
+			{
+				/* paranoia about corrupted chain links */
+				if (lt->nextOffset < FirstOffsetNumber ||
+					lt->nextOffset > max ||
+					predecessor[lt->nextOffset] != InvalidOffsetNumber)
+					elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"",
+						 BufferGetBlockNumber(buffer),
+						 RelationGetRelationName(index));
+				predecessor[lt->nextOffset] = i;
+			}
+		}
+		else
+		{
+			Assert(lt->nextOffset == InvalidOffsetNumber);
+		}
+	}
+
+	if (nDeletable == 0)
+		return;					/* nothing more to do */
+
+	/*----------
+	 * Figure out exactly what we have to do.  We do this separately from
+	 * actually modifying the page, mainly so that we have a representation
+	 * that can be dumped into WAL and then the replay code can do exactly
+	 * the same thing.  The output of this step consists of six arrays
+	 * describing four kinds of operations, to be performed in this order:
+	 *
+	 * toDead[]: tuple numbers to be replaced with DEAD tuples
+	 * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
+	 * moveSrc[]: tuple numbers that need to be relocated to another offset
+	 * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
+	 * moveDest[]: new locations for moveSrc tuples
+	 * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
+	 * chainDest[]: new values of nextOffset for chainSrc members
+	 *
+	 * It's easiest to figure out what we have to do by processing tuple
+	 * chains, so we iterate over all the tuples (not just the deletable
+	 * ones!) to identify chain heads, then chase down each chain and make
+	 * work item entries for deletable tuples within the chain.
+	 *----------
+	 */
+	xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0;
+
+	for (i = FirstOffsetNumber; i <= max; i++)
+	{
+		SpGistLeafTuple head;
+		bool		interveningDeletable;
+		OffsetNumber prevLive;
+		OffsetNumber j;
+
+		head = (SpGistLeafTuple) PageGetItem(page,
+											 PageGetItemId(page, i));
+		if (head->tupstate != SPGIST_LIVE)
+			continue;			/* can't be a chain member */
+		if (predecessor[i] != 0)
+			continue;			/* not a chain head */
+
+		/* initialize ... */
+		interveningDeletable = false;
+		prevLive = deletable[i] ? InvalidOffsetNumber : i;
+
+		/* scan down the chain ... */
+		j = head->nextOffset;
+		while (j != InvalidOffsetNumber)
+		{
+			SpGistLeafTuple lt;
+
+			lt = (SpGistLeafTuple) PageGetItem(page,
+											   PageGetItemId(page, j));
+			if (lt->tupstate != SPGIST_LIVE)
+			{
+				/* all tuples in chain should be live */
+				elog(ERROR, "unexpected SPGiST tuple state: %d",
+					 lt->tupstate);
+			}
+
+			if (deletable[j])
+			{
+				/* This tuple should be replaced by a placeholder */
+				toPlaceholder[xlrec.nPlaceholder] = j;
+				xlrec.nPlaceholder++;
+				/* previous live tuple's chain link will need an update */
+				interveningDeletable = true;
+			}
+			else if (prevLive == InvalidOffsetNumber)
+			{
+				/*
+				 * This is the first live tuple in the chain.  It has
+				 * to move to the head position.
+				 */
+				moveSrc[xlrec.nMove] = j;
+				moveDest[xlrec.nMove] = i;
+				xlrec.nMove++;
+				/* Chain updates will be applied after the move */
+				prevLive = i;
+				interveningDeletable = false;
+			}
+			else
+			{
+				/*
+				 * Second or later live tuple.  Arrange to re-chain it to the
+				 * previous live one, if there was a gap.
+				 */
+				if (interveningDeletable)
+				{
+					chainSrc[xlrec.nChain] = prevLive;
+					chainDest[xlrec.nChain] = j;
+					xlrec.nChain++;
+				}
+				prevLive = j;
+				interveningDeletable = false;
+			}
+
+			j = lt->nextOffset;
+		}
+
+		if (prevLive == InvalidOffsetNumber)
+		{
+			/* The chain is entirely removable, so we need a DEAD tuple */
+			toDead[xlrec.nDead] = i;
+			xlrec.nDead++;
+		}
+		else if (interveningDeletable)
+		{
+			/* One or more deletions at end of chain, so close it off */
+			chainSrc[xlrec.nChain] = prevLive;
+			chainDest[xlrec.nChain] = InvalidOffsetNumber;
+			xlrec.nChain++;
+		}
+	}
+
+	/* sanity check ... */
+	if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
+		elog(ERROR, "inconsistent counts of deletable tuples");
+
+	/* Prepare WAL record */
+	xlrec.node = index->rd_node;
+	xlrec.blkno = BufferGetBlockNumber(buffer);
+	STORE_STATE(&bds->spgstate, xlrec.stateSrc);
+
+	ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
+	/* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
+	ACCEPT_RDATA_DATA(toDead, sizeof(OffsetNumber) * xlrec.nDead, 1);
+	ACCEPT_RDATA_DATA(toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder, 2);
+	ACCEPT_RDATA_DATA(moveSrc, sizeof(OffsetNumber) * xlrec.nMove, 3);
+	ACCEPT_RDATA_DATA(moveDest, sizeof(OffsetNumber) * xlrec.nMove, 4);
+	ACCEPT_RDATA_DATA(chainSrc, sizeof(OffsetNumber) * xlrec.nChain, 5);
+	ACCEPT_RDATA_DATA(chainDest, sizeof(OffsetNumber) * xlrec.nChain, 6);
+	ACCEPT_RDATA_BUFFER(buffer, 7);
+
+	/* Do the updates */
+	START_CRIT_SECTION();
+
+	spgPageIndexMultiDelete(&bds->spgstate, page,
+							toDead, xlrec.nDead,
+							SPGIST_DEAD, SPGIST_DEAD,
+							InvalidBlockNumber, InvalidOffsetNumber);
+
+	spgPageIndexMultiDelete(&bds->spgstate, page,
+							toPlaceholder, xlrec.nPlaceholder,
+							SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
+							InvalidBlockNumber, InvalidOffsetNumber);
+
+	/*
+	 * We implement the move step by swapping the item pointers of the
+	 * source and target tuples, then replacing the newly-source tuples
+	 * with placeholders.  This is perhaps unduly friendly with the page
+	 * data representation, but it's fast and doesn't risk page overflow
+	 * when a tuple to be relocated is large.
+	 */
+	for (i = 0; i < xlrec.nMove; i++)
+	{
+		ItemId		idSrc = PageGetItemId(page, moveSrc[i]);
+		ItemId		idDest = PageGetItemId(page, moveDest[i]);
+		ItemIdData	tmp;
+
+		tmp = *idSrc;
+		*idSrc = *idDest;
+		*idDest = tmp;
+	}
+
+	spgPageIndexMultiDelete(&bds->spgstate, page,
+							moveSrc, xlrec.nMove,
+							SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
+							InvalidBlockNumber, InvalidOffsetNumber);
+
+	for (i = 0; i < xlrec.nChain; i++)
+	{
+		SpGistLeafTuple lt;
+
+		lt = (SpGistLeafTuple) PageGetItem(page,
+										   PageGetItemId(page, chainSrc[i]));
+		Assert(lt->tupstate == SPGIST_LIVE);
+		lt->nextOffset = chainDest[i];
+	}
+
+	MarkBufferDirty(buffer);
+
+	if (RelationNeedsWAL(index))
+	{
+		XLogRecPtr	recptr;
+
+		recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF, rdata);
+
+		PageSetLSN(page, recptr);
+		PageSetTLI(page, ThisTimeLineID);
+	}
+
+	END_CRIT_SECTION();
+}
+
+/*
+ * Vacuum the root page when it is a leaf
+ *
+ * On the root, we just delete any dead leaf tuples; no fancy business
+ */
+static void
+vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
+{
+	Page		page = BufferGetPage(buffer);
+	spgxlogVacuumRoot xlrec;
+	XLogRecData rdata[3];
+	OffsetNumber toDelete[MaxIndexTuplesPerPage];
+	OffsetNumber i,
+				max = PageGetMaxOffsetNumber(page);
+
+	xlrec.nDelete = 0;
+
+	/* Scan page, identify tuples to delete, accumulate stats */
+	for (i = FirstOffsetNumber; i <= max; i++)
+	{
+		SpGistLeafTuple lt;
+
+		lt = (SpGistLeafTuple) PageGetItem(page,
+										   PageGetItemId(page, i));
+		if (lt->tupstate == SPGIST_LIVE)
+		{
+			Assert(ItemPointerIsValid(&lt->heapPtr));
+
+			if (bds->callback(&lt->heapPtr, bds->callback_state))
+			{
+				bds->stats->tuples_removed += 1;
+				toDelete[xlrec.nDelete] = i;
+				xlrec.nDelete++;
+			}
+			else
+			{
+				bds->stats->num_index_tuples += 1;
+			}
+		}
+		else
+		{
+			/* all tuples on root should be live */
+			elog(ERROR, "unexpected SPGiST tuple state: %d",
+				 lt->tupstate);
+		}
+	}
+
+	if (xlrec.nDelete == 0)
+		return;					/* nothing more to do */
+
+	/* Prepare WAL record */
+	xlrec.node = index->rd_node;
+	STORE_STATE(&bds->spgstate, xlrec.stateSrc);
+
+	ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
+	/* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
+	ACCEPT_RDATA_DATA(toDelete, sizeof(OffsetNumber) * xlrec.nDelete, 1);
+	ACCEPT_RDATA_BUFFER(buffer, 2);
+
+	/* Do the update */
+	START_CRIT_SECTION();
+
+	/* The tuple numbers are in order, so we can use PageIndexMultiDelete */
+	PageIndexMultiDelete(page, toDelete, xlrec.nDelete);
+
+	MarkBufferDirty(buffer);
+
+	if (RelationNeedsWAL(index))
+	{
+		XLogRecPtr	recptr;
+
+		recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT, rdata);
+
+		PageSetLSN(page, recptr);
+		PageSetTLI(page, ThisTimeLineID);
+	}
+
+	END_CRIT_SECTION();
+}
+
+/*
+ * Clean up redirect and placeholder tuples on the given page
+ *
+ * Redirect tuples can be marked placeholder once they're old enough.
+ * Placeholder tuples can be removed if it won't change the offsets of
+ * non-placeholder ones.
+ *
+ * Unlike the routines above, this works on both leaf and inner pages.
+ */
+static void
+vacuumRedirectAndPlaceholder(Relation index, Buffer buffer,
+							 TransactionId OldestXmin)
+{
+	Page		page = BufferGetPage(buffer);
+	SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
+	OffsetNumber i,
+				max = PageGetMaxOffsetNumber(page),
+				firstPlaceholder = InvalidOffsetNumber;
+	bool		hasNonPlaceholder = false;
+	bool		hasUpdate = false;
+	OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
+	OffsetNumber itemnos[MaxIndexTuplesPerPage];
+	spgxlogVacuumRedirect xlrec;
+	XLogRecData rdata[3];
+
+	xlrec.node = index->rd_node;
+	xlrec.blkno = BufferGetBlockNumber(buffer);
+	xlrec.nToPlaceholder = 0;
+
+	START_CRIT_SECTION();
+
+	/*
+	 * Scan backwards to convert old redirection tuples to placeholder tuples,
+	 * and identify location of last non-placeholder tuple while at it.
+	 */
+	for (i = max;
+		 i >= FirstOffsetNumber &&
+			 (opaque->nRedirection > 0 || !hasNonPlaceholder);
+		 i--)
+	{
+		SpGistDeadTuple dt;
+
+		dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i));
+
+		if (dt->tupstate == SPGIST_REDIRECT &&
+			TransactionIdPrecedes(dt->xid, OldestXmin))
+		{
+			dt->tupstate = SPGIST_PLACEHOLDER;
+			Assert(opaque->nRedirection > 0);
+			opaque->nRedirection--;
+			opaque->nPlaceholder++;
+
+			ItemPointerSetInvalid(&dt->pointer);
+
+			itemToPlaceholder[xlrec.nToPlaceholder] = i;
+			xlrec.nToPlaceholder++;
+
+			hasUpdate = true;
+		}
+
+		if (dt->tupstate == SPGIST_PLACEHOLDER)
+		{
+			if (!hasNonPlaceholder)
+				firstPlaceholder = i;
+		}
+		else
+		{
+			hasNonPlaceholder = true;
+		}
+	}
+
+	/*
+	 * Any placeholder tuples at the end of page can safely be removed.  We
+	 * can't remove ones before the last non-placeholder, though, because we
+	 * can't alter the offset numbers of non-placeholder tuples.
+	 */
+	if (firstPlaceholder != InvalidOffsetNumber)
+	{
+		/*
+		 * We do not store this array to rdata because it's easy to recreate.
+		 */
+		for (i = firstPlaceholder; i <= max; i++)
+			itemnos[i - firstPlaceholder] = i;
+
+		i = max - firstPlaceholder + 1;
+		Assert(opaque->nPlaceholder >= i);
+		opaque->nPlaceholder -= i;
+
+		/* The array is surely sorted, so can use PageIndexMultiDelete */
+		PageIndexMultiDelete(page, itemnos, i);
+
+		hasUpdate = true;
+	}
+
+	xlrec.firstPlaceholder = firstPlaceholder;
+
+	if (hasUpdate)
+		MarkBufferDirty(buffer);
+
+	if (hasUpdate && RelationNeedsWAL(index))
+	{
+		XLogRecPtr	recptr;
+
+		ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
+		ACCEPT_RDATA_DATA(itemToPlaceholder, sizeof(OffsetNumber) * xlrec.nToPlaceholder, 1);
+		ACCEPT_RDATA_BUFFER(buffer, 2);
+
+		recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT, rdata);
+
+		PageSetLSN(page, recptr);
+		PageSetTLI(page, ThisTimeLineID);
+	}
+
+	END_CRIT_SECTION();
+}
+
+/*
+ * Process one page during a bulkdelete scan
+ */
+static void
+spgvacuumpage(spgBulkDeleteState *bds, BlockNumber blkno)
+{
+	Relation	index = bds->info->index;
+	Buffer		buffer;
+	Page		page;
+
+	/* call vacuum_delay_point while not holding any buffer lock */
+	vacuum_delay_point();
+
+	buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
+								RBM_NORMAL, bds->info->strategy);
+	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+	page = (Page) BufferGetPage(buffer);
+
+	if (PageIsNew(page))
+	{
+		/*
+		 * We found an all-zero page, which could happen if the database
+		 * crashed just after extending the file.  Initialize and recycle it.
+		 */
+		SpGistInitBuffer(buffer, 0);
+		SpGistPageSetDeleted(page);
+		/* We don't bother to WAL-log this action; easy to redo */
+		MarkBufferDirty(buffer);
+	}
+	else if (SpGistPageIsDeleted(page))
+	{
+		/* nothing to do */
+	}
+	else if (SpGistPageIsLeaf(page))
+	{
+		if (blkno == SPGIST_HEAD_BLKNO)
+		{
+			vacuumLeafRoot(bds, index, buffer);
+			/* no need for vacuumRedirectAndPlaceholder */
+		}
+		else
+		{
+			vacuumLeafPage(bds, index, buffer);
+			vacuumRedirectAndPlaceholder(index, buffer, bds->OldestXmin);
+		}
+	}
+	else
+	{
+		/* inner page */
+		vacuumRedirectAndPlaceholder(index, buffer, bds->OldestXmin);
+	}
+
+	/*
+	 * The root page must never be deleted, nor marked as available in FSM,
+	 * because we don't want it ever returned by a search for a place to
+	 * put a new tuple.  Otherwise, check for empty/deletable page, and
+	 * make sure FSM knows about it.
+	 */
+	if (blkno != SPGIST_HEAD_BLKNO)
+	{
+		/* If page is now empty, mark it deleted */
+		if (PageIsEmpty(page) && !SpGistPageIsDeleted(page))
+		{
+			SpGistPageSetDeleted(page);
+			/* We don't bother to WAL-log this action; easy to redo */
+			MarkBufferDirty(buffer);
+		}
+
+		if (SpGistPageIsDeleted(page))
+		{
+			RecordFreeIndexPage(index, blkno);
+			bds->stats->pages_deleted++;
+		}
+		else
+			bds->lastFilledBlock = blkno;
+	}
+
+	SpGistSetLastUsedPage(index, buffer);
+
+	UnlockReleaseBuffer(buffer);
+}
+
+/*
+ * Perform a bulkdelete scan
+ */
+static void
+spgvacuumscan(spgBulkDeleteState *bds)
+{
+	Relation	index = bds->info->index;
+	bool		needLock;
+	BlockNumber num_pages,
+				blkno;
+
+	/* Finish setting up spgBulkDeleteState */
+	initSpGistState(&bds->spgstate, index);
+	bds->OldestXmin = GetOldestXmin(true, false);
+	bds->lastFilledBlock = SPGIST_HEAD_BLKNO;
+
+	/*
+	 * Reset counts that will be incremented during the scan; needed in case
+	 * of multiple scans during a single VACUUM command
+	 */
+	bds->stats->estimated_count = false;
+	bds->stats->num_index_tuples = 0;
+	bds->stats->pages_deleted = 0;
+
+	/* We can skip locking for new or temp relations */
+	needLock = !RELATION_IS_LOCAL(index);
+
+	/*
+	 * The outer loop iterates over all index pages except the metapage, in
+	 * physical order (we hope the kernel will cooperate in providing
+	 * read-ahead for speed).  It is critical that we visit all leaf pages,
+	 * including ones added after we start the scan, else we might fail to
+	 * delete some deletable tuples.  See more extensive comments about
+	 * this in btvacuumscan().
+	 */
+	blkno = SPGIST_HEAD_BLKNO;
+	for (;;)
+	{
+		/* Get the current relation length */
+		if (needLock)
+			LockRelationForExtension(index, ExclusiveLock);
+		num_pages = RelationGetNumberOfBlocks(index);
+		if (needLock)
+			UnlockRelationForExtension(index, ExclusiveLock);
+
+		/* Quit if we've scanned the whole relation */
+		if (blkno >= num_pages)
+			break;
+		/* Iterate over pages, then loop back to recheck length */
+		for (; blkno < num_pages; blkno++)
+		{
+			spgvacuumpage(bds, blkno);
+		}
+	}
+
+	/* Propagate local lastUsedPage cache to metablock */
+	SpGistUpdateMetaPage(index);
+
+	/*
+	 * Truncate index if possible
+	 *
+	 * XXX disabled because it's unsafe due to possible concurrent inserts.
+	 * We'd have to rescan the pages to make sure they're still empty, and it
+	 * doesn't seem worth it.  Note that btree doesn't do this either.
+	 */
+#ifdef NOT_USED
+	if (num_pages > bds->lastFilledBlock + 1)
+	{
+		BlockNumber lastBlock = num_pages - 1;
+
+		num_pages = bds->lastFilledBlock + 1;
+		RelationTruncate(index, num_pages);
+		bds->stats->pages_removed += lastBlock - bds->lastFilledBlock;
+		bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock;
+	}
+#endif
+
+	/* Report final stats */
+	bds->stats->num_pages = num_pages;
+	bds->stats->pages_free = bds->stats->pages_deleted;
+}
+
+/*
+ * Bulk deletion of all index entries pointing to a set of heap tuples.
+ * The set of target tuples is specified via a callback routine that tells
+ * whether any given heap tuple (identified by ItemPointer) is being deleted.
+ *
+ * Result: a palloc'd struct containing statistical info for VACUUM displays.
+ */
+Datum
+spgbulkdelete(PG_FUNCTION_ARGS)
+{
+	IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
+	IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
+	IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2);
+	void	   *callback_state = (void *) PG_GETARG_POINTER(3);
+	spgBulkDeleteState bds;
+
+	/* allocate stats if first time through, else re-use existing struct */
+	if (stats == NULL)
+		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+	bds.info = info;
+	bds.stats = stats;
+	bds.callback = callback;
+	bds.callback_state = callback_state;
+
+	spgvacuumscan(&bds);
+
+	PG_RETURN_POINTER(stats);
+}
+
+/* Dummy callback to delete no tuples during spgvacuumcleanup */
+static bool
+dummy_callback(ItemPointer itemptr, void *state)
+{
+	return false;
+}
+
+/*
+ * Post-VACUUM cleanup.
+ *
+ * Result: a palloc'd struct containing statistical info for VACUUM displays.
+ */
+Datum
+spgvacuumcleanup(PG_FUNCTION_ARGS)
+{
+	IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
+	IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
+	Relation	index = info->index;
+	spgBulkDeleteState bds;
+
+	/* No-op in ANALYZE ONLY mode */
+	if (info->analyze_only)
+		PG_RETURN_POINTER(stats);
+
+	/*
+	 * We don't need to scan the index if there was a preceding bulkdelete
+	 * pass.  Otherwise, make a pass that won't delete any live tuples, but
+	 * might still accomplish useful stuff with redirect/placeholder cleanup,
+	 * and in any case will provide stats.
+	 */
+	if (stats == NULL)
+	{
+		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+		bds.info = info;
+		bds.stats = stats;
+		bds.callback = dummy_callback;
+		bds.callback_state = NULL;
+
+		spgvacuumscan(&bds);
+	}
+
+	/* Finally, vacuum the FSM */
+	IndexFreeSpaceMapVacuum(index);
+
+	/*
+	 * It's quite possible for us to be fooled by concurrent page splits into
+	 * double-counting some index tuples, so disbelieve any total that exceeds
+	 * the underlying heap's count ... if we know that accurately.  Otherwise
+	 * this might just make matters worse.
+	 */
+	if (!info->estimated_count)
+	{
+		if (stats->num_index_tuples > info->num_heap_tuples)
+			stats->num_index_tuples = info->num_heap_tuples;
+	}
+
+	PG_RETURN_POINTER(stats);
+}
diff --git a/src/backend/access/spgist/spgxlog.c b/src/backend/access/spgist/spgxlog.c
new file mode 100644
index 00000000000..e508f09703d
--- /dev/null
+++ b/src/backend/access/spgist/spgxlog.c
@@ -0,0 +1,1070 @@
+/*-------------------------------------------------------------------------
+ *
+ * spgxlog.c
+ *	  WAL replay logic for SP-GiST
+ *
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *			 src/backend/access/spgist/spgxlog.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/spgist_private.h"
+#include "access/xlogutils.h"
+#include "storage/bufmgr.h"
+#include "utils/memutils.h"
+
+
+static MemoryContext opCtx;		/* working memory for operations */
+
+
+/*
+ * Prepare a dummy SpGistState, with just the minimum info needed for replay.
+ *
+ * At present, all we need is enough info to support spgFormDeadTuple(),
+ * plus the isBuild flag.
+ */
+static void
+fillFakeState(SpGistState *state, spgxlogState stateSrc)
+{
+	memset(state, 0, sizeof(*state));
+
+	state->myXid = stateSrc.myXid;
+	state->isBuild = stateSrc.isBuild;
+	state->deadTupleStorage = palloc0(SGDTSIZE);
+}
+
+/*
+ * Add a leaf tuple, or replace an existing placeholder tuple.  This is used
+ * to replay SpGistPageAddNewItem() operations.  If the offset points at an
+ * existing tuple, it had better be a placeholder tuple.
+ */
+static void
+addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset)
+{
+	if (offset <= PageGetMaxOffsetNumber(page))
+	{
+		SpGistDeadTuple dt = (SpGistDeadTuple) PageGetItem(page,
+														   PageGetItemId(page, offset));
+
+		if (dt->tupstate != SPGIST_PLACEHOLDER)
+			elog(ERROR, "SPGiST tuple to be replaced is not a placeholder");
+
+		Assert(SpGistPageGetOpaque(page)->nPlaceholder > 0);
+		SpGistPageGetOpaque(page)->nPlaceholder--;
+
+		PageIndexTupleDelete(page, offset);
+	}
+
+	Assert(offset <= PageGetMaxOffsetNumber(page) + 1);
+
+	if (PageAddItem(page, tuple, size, offset, false, false) != offset)
+		elog(ERROR, "failed to add item of size %u to SPGiST index page",
+			 size);
+}
+
+static void
+spgRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
+{
+	RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
+	Buffer		buffer;
+	Page		page;
+
+	buffer = XLogReadBuffer(*node, SPGIST_METAPAGE_BLKNO, true);
+	Assert(BufferIsValid(buffer));
+	page = (Page) BufferGetPage(buffer);
+	SpGistInitMetapage(page);
+	PageSetLSN(page, lsn);
+	PageSetTLI(page, ThisTimeLineID);
+	MarkBufferDirty(buffer);
+	UnlockReleaseBuffer(buffer);
+
+	buffer = XLogReadBuffer(*node, SPGIST_HEAD_BLKNO, true);
+	Assert(BufferIsValid(buffer));
+	SpGistInitBuffer(buffer, SPGIST_LEAF);
+	page = (Page) BufferGetPage(buffer);
+	PageSetLSN(page, lsn);
+	PageSetTLI(page, ThisTimeLineID);
+	MarkBufferDirty(buffer);
+	UnlockReleaseBuffer(buffer);
+}
+
+static void
+spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
+{
+	char	   *ptr = XLogRecGetData(record);
+	spgxlogAddLeaf *xldata = (spgxlogAddLeaf *) ptr;
+	SpGistLeafTuple leafTuple;
+	Buffer		buffer;
+	Page		page;
+
+	/* we assume this is adequately aligned */
+	ptr += sizeof(spgxlogAddLeaf);
+	leafTuple = (SpGistLeafTuple) ptr;
+
+	if (!(record->xl_info & XLR_BKP_BLOCK_1))
+	{
+		buffer = XLogReadBuffer(xldata->node, xldata->blknoLeaf,
+								xldata->newPage);
+		if (BufferIsValid(buffer))
+		{
+			page = BufferGetPage(buffer);
+
+			if (xldata->newPage)
+				SpGistInitBuffer(buffer, SPGIST_LEAF);
+
+			if (!XLByteLE(lsn, PageGetLSN(page)))
+			{
+				/* insert new tuple */
+				if (xldata->offnumLeaf != xldata->offnumHeadLeaf)
+				{
+					/* normal cases, tuple was added by SpGistPageAddNewItem */
+					addOrReplaceTuple(page, (Item) leafTuple, leafTuple->size,
+									  xldata->offnumLeaf);
+
+					/* update head tuple's chain link if needed */
+					if (xldata->offnumHeadLeaf != InvalidOffsetNumber)
+					{
+						SpGistLeafTuple head;
+
+						head = (SpGistLeafTuple) PageGetItem(page,
+														 PageGetItemId(page, xldata->offnumHeadLeaf));
+						Assert(head->nextOffset == leafTuple->nextOffset);
+						head->nextOffset = xldata->offnumLeaf;
+					}
+				}
+				else
+				{
+					/* replacing a DEAD tuple */
+					PageIndexTupleDelete(page, xldata->offnumLeaf);
+					if (PageAddItem(page,
+									(Item) leafTuple, leafTuple->size,
+									xldata->offnumLeaf, false, false) != xldata->offnumLeaf)
+						elog(ERROR, "failed to add item of size %u to SPGiST index page",
+							 leafTuple->size);
+				}
+
+				PageSetLSN(page, lsn);
+				PageSetTLI(page, ThisTimeLineID);
+				MarkBufferDirty(buffer);
+			}
+			UnlockReleaseBuffer(buffer);
+		}
+	}
+
+	/* update parent downlink if necessary */
+	if (xldata->blknoParent != InvalidBlockNumber &&
+		!(record->xl_info & XLR_BKP_BLOCK_2))
+	{
+		buffer = XLogReadBuffer(xldata->node, xldata->blknoParent, false);
+		if (BufferIsValid(buffer))
+		{
+			page = BufferGetPage(buffer);
+			if (!XLByteLE(lsn, PageGetLSN(page)))
+			{
+				SpGistInnerTuple tuple;
+
+				tuple = (SpGistInnerTuple) PageGetItem(page,
+													   PageGetItemId(page, xldata->offnumParent));
+
+				updateNodeLink(tuple, xldata->nodeI,
+							   xldata->blknoLeaf, xldata->offnumLeaf);
+
+				PageSetLSN(page, lsn);
+				PageSetTLI(page, ThisTimeLineID);
+				MarkBufferDirty(buffer);
+			}
+			UnlockReleaseBuffer(buffer);
+		}
+	}
+}
+
+static void
+spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
+{
+	char	   *ptr = XLogRecGetData(record);
+	spgxlogMoveLeafs *xldata = (spgxlogMoveLeafs *) ptr;
+	SpGistState state;
+	OffsetNumber *toDelete;
+	OffsetNumber *toInsert;
+	int			nInsert;
+	Buffer		buffer;
+	Page		page;
+
+	fillFakeState(&state, xldata->stateSrc);
+
+	nInsert = xldata->replaceDead ? 1 : xldata->nMoves + 1;
+
+	ptr += MAXALIGN(sizeof(spgxlogMoveLeafs));
+	toDelete = (OffsetNumber *) ptr;
+	ptr += MAXALIGN(sizeof(OffsetNumber) * xldata->nMoves);
+	toInsert = (OffsetNumber *) ptr;
+	ptr += MAXALIGN(sizeof(OffsetNumber) * nInsert);
+
+	/* now ptr points to the list of leaf tuples */
+
+	/* Insert tuples on the dest page (do first, so redirect is valid) */
+	if (!(record->xl_info & XLR_BKP_BLOCK_2))
+	{
+		buffer = XLogReadBuffer(xldata->node, xldata->blknoDst,
+								xldata->newPage);
+		if (BufferIsValid(buffer))
+		{
+			page = BufferGetPage(buffer);
+
+			if (xldata->newPage)
+				SpGistInitBuffer(buffer, SPGIST_LEAF);
+
+			if (!XLByteLE(lsn, PageGetLSN(page)))
+			{
+				int			i;
+
+				for (i = 0; i < nInsert; i++)
+				{
+					SpGistLeafTuple lt = (SpGistLeafTuple) ptr;
+
+					addOrReplaceTuple(page, (Item) lt, lt->size, toInsert[i]);
+					ptr += lt->size;
+				}
+
+				PageSetLSN(page, lsn);
+				PageSetTLI(page, ThisTimeLineID);
+				MarkBufferDirty(buffer);
+			}
+			UnlockReleaseBuffer(buffer);
+		}
+	}
+
+	/* Delete tuples from the source page, inserting a redirection pointer */
+	if (!(record->xl_info & XLR_BKP_BLOCK_1))
+	{
+		buffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, false);
+		if (BufferIsValid(buffer))
+		{
+			page = BufferGetPage(buffer);
+			if (!XLByteLE(lsn, PageGetLSN(page)))
+			{
+				spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves,
+										state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT,
+										SPGIST_PLACEHOLDER,
+										xldata->blknoDst,
+										toInsert[nInsert - 1]);
+
+				PageSetLSN(page, lsn);
+				PageSetTLI(page, ThisTimeLineID);
+				MarkBufferDirty(buffer);
+			}
+			UnlockReleaseBuffer(buffer);
+		}
+	}
+
+	/* And update the parent downlink */
+	if (!(record->xl_info & XLR_BKP_BLOCK_3))
+	{
+		buffer = XLogReadBuffer(xldata->node, xldata->blknoParent, false);
+		if (BufferIsValid(buffer))
+		{
+			page = BufferGetPage(buffer);
+			if (!XLByteLE(lsn, PageGetLSN(page)))
+			{
+				SpGistInnerTuple tuple;
+
+				tuple = (SpGistInnerTuple) PageGetItem(page,
+													   PageGetItemId(page, xldata->offnumParent));
+
+				updateNodeLink(tuple, xldata->nodeI,
+							   xldata->blknoDst, toInsert[nInsert - 1]);
+
+				PageSetLSN(page, lsn);
+				PageSetTLI(page, ThisTimeLineID);
+				MarkBufferDirty(buffer);
+			}
+			UnlockReleaseBuffer(buffer);
+		}
+	}
+}
+
+static void
+spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
+{
+	char	   *ptr = XLogRecGetData(record);
+	spgxlogAddNode *xldata = (spgxlogAddNode *) ptr;
+	SpGistInnerTuple innerTuple;
+	SpGistState state;
+	Buffer		buffer;
+	Page		page;
+	int			bbi;
+
+	/* we assume this is adequately aligned */
+	ptr += sizeof(spgxlogAddNode);
+	innerTuple = (SpGistInnerTuple) ptr;
+
+	fillFakeState(&state, xldata->stateSrc);
+
+	if (xldata->blknoNew == InvalidBlockNumber)
+	{
+		/* update in place */
+		Assert(xldata->blknoParent == InvalidBlockNumber);
+		if (!(record->xl_info & XLR_BKP_BLOCK_1))
+		{
+			buffer = XLogReadBuffer(xldata->node, xldata->blkno, false);
+			if (BufferIsValid(buffer))
+			{
+				page = BufferGetPage(buffer);
+				if (!XLByteLE(lsn, PageGetLSN(page)))
+				{
+					PageIndexTupleDelete(page, xldata->offnum);
+					if (PageAddItem(page, (Item) innerTuple, innerTuple->size,
+									xldata->offnum,
+									false, false) != xldata->offnum)
+						elog(ERROR, "failed to add item of size %u to SPGiST index page",
+							 innerTuple->size);
+
+					PageSetLSN(page, lsn);
+					PageSetTLI(page, ThisTimeLineID);
+					MarkBufferDirty(buffer);
+				}
+				UnlockReleaseBuffer(buffer);
+			}
+		}
+	}
+	else
+	{
+		/* Install new tuple first so redirect is valid */
+		if (!(record->xl_info & XLR_BKP_BLOCK_2))
+		{
+			buffer = XLogReadBuffer(xldata->node, xldata->blknoNew,
+									xldata->newPage);
+			if (BufferIsValid(buffer))
+			{
+				page = BufferGetPage(buffer);
+
+				if (xldata->newPage)
+					SpGistInitBuffer(buffer, 0);
+
+				if (!XLByteLE(lsn, PageGetLSN(page)))
+				{
+					addOrReplaceTuple(page, (Item) innerTuple,
+									  innerTuple->size, xldata->offnumNew);
+
+					PageSetLSN(page, lsn);
+					PageSetTLI(page, ThisTimeLineID);
+					MarkBufferDirty(buffer);
+				}
+				UnlockReleaseBuffer(buffer);
+			}
+		}
+
+		/* Delete old tuple, replacing it with redirect or placeholder tuple */
+		if (!(record->xl_info & XLR_BKP_BLOCK_1))
+		{
+			buffer = XLogReadBuffer(xldata->node, xldata->blkno, false);
+			if (BufferIsValid(buffer))
+			{
+				page = BufferGetPage(buffer);
+				if (!XLByteLE(lsn, PageGetLSN(page)))
+				{
+					SpGistDeadTuple dt;
+
+					if (state.isBuild)
+						dt = spgFormDeadTuple(&state, SPGIST_PLACEHOLDER,
+											  InvalidBlockNumber,
+											  InvalidOffsetNumber);
+					else
+						dt = spgFormDeadTuple(&state, SPGIST_REDIRECT,
+											  xldata->blknoNew,
+											  xldata->offnumNew);
+
+					PageIndexTupleDelete(page, xldata->offnum);
+					if (PageAddItem(page, (Item) dt, dt->size,
+									xldata->offnum,
+									false, false) != xldata->offnum)
+						elog(ERROR, "failed to add item of size %u to SPGiST index page",
+							 dt->size);
+
+					if (state.isBuild)
+						SpGistPageGetOpaque(page)->nPlaceholder++;
+					else
+						SpGistPageGetOpaque(page)->nRedirection++;
+
+					PageSetLSN(page, lsn);
+					PageSetTLI(page, ThisTimeLineID);
+					MarkBufferDirty(buffer);
+				}
+				UnlockReleaseBuffer(buffer);
+			}
+		}
+
+		/*
+		 * Update parent downlink.  Since parent could be in either of the
+		 * previous two buffers, it's a bit tricky to determine which BKP bit
+		 * applies.
+		 */
+		if (xldata->blknoParent == xldata->blkno)
+			bbi = 0;
+		else if (xldata->blknoParent == xldata->blknoNew)
+			bbi = 1;
+		else
+			bbi = 2;
+
+		if (!(record->xl_info & XLR_SET_BKP_BLOCK(bbi)))
+		{
+			buffer = XLogReadBuffer(xldata->node, xldata->blknoParent, false);
+			if (BufferIsValid(buffer))
+			{
+				page = BufferGetPage(buffer);
+				if (!XLByteLE(lsn, PageGetLSN(page)))
+				{
+					SpGistInnerTuple innerTuple;
+
+					innerTuple = (SpGistInnerTuple) PageGetItem(page,
+																PageGetItemId(page, xldata->offnumParent));
+
+					updateNodeLink(innerTuple, xldata->nodeI,
+								   xldata->blknoNew, xldata->offnumNew);
+
+					PageSetLSN(page, lsn);
+					PageSetTLI(page, ThisTimeLineID);
+					MarkBufferDirty(buffer);
+				}
+				UnlockReleaseBuffer(buffer);
+			}
+		}
+	}
+}
+
+static void
+spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
+{
+	char	   *ptr = XLogRecGetData(record);
+	spgxlogSplitTuple *xldata = (spgxlogSplitTuple *) ptr;
+	SpGistInnerTuple prefixTuple;
+	SpGistInnerTuple postfixTuple;
+	Buffer		buffer;
+	Page		page;
+
+	/* we assume this is adequately aligned */
+	ptr += sizeof(spgxlogSplitTuple);
+	prefixTuple = (SpGistInnerTuple) ptr;
+	ptr += prefixTuple->size;
+	postfixTuple = (SpGistInnerTuple) ptr;
+
+	/* insert postfix tuple first to avoid dangling link */
+	if (xldata->blknoPostfix != xldata->blknoPrefix &&
+		!(record->xl_info & XLR_BKP_BLOCK_2))
+	{
+		buffer = XLogReadBuffer(xldata->node, xldata->blknoPostfix,
+								xldata->newPage);
+		if (BufferIsValid(buffer))
+		{
+			page = BufferGetPage(buffer);
+
+			if (xldata->newPage)
+				SpGistInitBuffer(buffer, 0);
+
+			if (!XLByteLE(lsn, PageGetLSN(page)))
+			{
+				addOrReplaceTuple(page, (Item) postfixTuple,
+								  postfixTuple->size, xldata->offnumPostfix);
+
+				PageSetLSN(page, lsn);
+				PageSetTLI(page, ThisTimeLineID);
+				MarkBufferDirty(buffer);
+			}
+			UnlockReleaseBuffer(buffer);
+		}
+	}
+
+	/* now handle the original page */
+	if (!(record->xl_info & XLR_BKP_BLOCK_1))
+	{
+		buffer = XLogReadBuffer(xldata->node, xldata->blknoPrefix, false);
+		if (BufferIsValid(buffer))
+		{
+			page = BufferGetPage(buffer);
+			if (!XLByteLE(lsn, PageGetLSN(page)))
+			{
+				PageIndexTupleDelete(page, xldata->offnumPrefix);
+				if (PageAddItem(page, (Item) prefixTuple, prefixTuple->size,
+								xldata->offnumPrefix, false, false) != xldata->offnumPrefix)
+					elog(ERROR, "failed to add item of size %u to SPGiST index page",
+						 prefixTuple->size);
+
+				if (xldata->blknoPostfix == xldata->blknoPrefix)
+					addOrReplaceTuple(page, (Item) postfixTuple,
+									  postfixTuple->size,
+									  xldata->offnumPostfix);
+
+				PageSetLSN(page, lsn);
+				PageSetTLI(page, ThisTimeLineID);
+				MarkBufferDirty(buffer);
+			}
+			UnlockReleaseBuffer(buffer);
+		}
+	}
+}
+
+static void
+spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
+{
+	char	   *ptr = XLogRecGetData(record);
+	spgxlogPickSplit *xldata = (spgxlogPickSplit *) ptr;
+	SpGistInnerTuple innerTuple;
+	SpGistState state;
+	OffsetNumber *toDelete;
+	OffsetNumber *toInsert;
+	uint8	   *leafPageSelect;
+	Buffer		srcBuffer;
+	Buffer		destBuffer;
+	Page		page;
+	int			bbi;
+	int			i;
+
+	fillFakeState(&state, xldata->stateSrc);
+
+	ptr += MAXALIGN(sizeof(spgxlogPickSplit));
+	innerTuple = (SpGistInnerTuple) ptr;
+	ptr += innerTuple->size;
+	toDelete = (OffsetNumber *) ptr;
+	ptr += MAXALIGN(sizeof(OffsetNumber) * xldata->nDelete);
+	toInsert = (OffsetNumber *) ptr;
+	ptr += MAXALIGN(sizeof(OffsetNumber) * xldata->nInsert);
+	leafPageSelect = (uint8 *) ptr;
+	ptr += MAXALIGN(sizeof(uint8) * xldata->nInsert);
+
+	/* now ptr points to the list of leaf tuples */
+
+	/*
+	 * It's a bit tricky to identify which pages have been handled as
+	 * full-page images, so we explicitly count each referenced buffer.
+	 */
+	bbi = 0;
+
+	if (xldata->blknoSrc == SPGIST_HEAD_BLKNO)
+	{
+		/* when splitting root, we touch it only in the guise of new inner */
+		srcBuffer = InvalidBuffer;
+	}
+	else if (xldata->initSrc)
+	{
+		/* just re-init the source page */
+		srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, true);
+		Assert(BufferIsValid(srcBuffer));
+		page = (Page) BufferGetPage(srcBuffer);
+
+		SpGistInitBuffer(srcBuffer, SPGIST_LEAF);
+		/* don't update LSN etc till we're done with it */
+	}
+	else
+	{
+		/* delete the specified tuples from source page */
+		if (!(record->xl_info & XLR_SET_BKP_BLOCK(bbi)))
+		{
+			srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, false);
+			if (BufferIsValid(srcBuffer))
+			{
+				page = BufferGetPage(srcBuffer);
+				if (!XLByteLE(lsn, PageGetLSN(page)))
+				{
+					/*
+					 * We have it a bit easier here than in doPickSplit(),
+					 * because we know the inner tuple's location already,
+					 * so we can inject the correct redirection tuple now.
+					 */
+					if (!state.isBuild)
+						spgPageIndexMultiDelete(&state, page,
+												toDelete, xldata->nDelete,
+												SPGIST_REDIRECT,
+												SPGIST_PLACEHOLDER,
+												xldata->blknoInner,
+												xldata->offnumInner);
+					else
+						spgPageIndexMultiDelete(&state, page,
+												toDelete, xldata->nDelete,
+												SPGIST_PLACEHOLDER,
+												SPGIST_PLACEHOLDER,
+												InvalidBlockNumber,
+												InvalidOffsetNumber);
+
+					/* don't update LSN etc till we're done with it */
+				}
+			}
+		}
+		else
+			srcBuffer = InvalidBuffer;
+		bbi++;
+	}
+
+	/* try to access dest page if any */
+	if (xldata->blknoDest == InvalidBlockNumber)
+	{
+		destBuffer = InvalidBuffer;
+	}
+	else if (xldata->initDest)
+	{
+		/* just re-init the dest page */
+		destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, true);
+		Assert(BufferIsValid(destBuffer));
+		page = (Page) BufferGetPage(destBuffer);
+
+		SpGistInitBuffer(destBuffer, SPGIST_LEAF);
+		/* don't update LSN etc till we're done with it */
+	}
+	else
+	{
+		if (!(record->xl_info & XLR_SET_BKP_BLOCK(bbi)))
+			destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, false);
+		else
+			destBuffer = InvalidBuffer;
+		bbi++;
+	}
+
+	/* restore leaf tuples to src and/or dest page */
+	for (i = 0; i < xldata->nInsert; i++)
+	{
+		SpGistLeafTuple lt = (SpGistLeafTuple) ptr;
+		Buffer		leafBuffer;
+
+		ptr += lt->size;
+
+		leafBuffer = leafPageSelect[i] ? destBuffer : srcBuffer;
+		if (!BufferIsValid(leafBuffer))
+			continue;			/* no need to touch this page */
+		page = BufferGetPage(leafBuffer);
+
+		if (!XLByteLE(lsn, PageGetLSN(page)))
+		{
+			addOrReplaceTuple(page, (Item) lt, lt->size, toInsert[i]);
+		}
+	}
+
+	/* Now update src and dest page LSNs */
+	if (BufferIsValid(srcBuffer))
+	{
+		page = BufferGetPage(srcBuffer);
+		if (!XLByteLE(lsn, PageGetLSN(page)))
+		{
+			PageSetLSN(page, lsn);
+			PageSetTLI(page, ThisTimeLineID);
+			MarkBufferDirty(srcBuffer);
+		}
+		UnlockReleaseBuffer(srcBuffer);
+	}
+	if (BufferIsValid(destBuffer))
+	{
+		page = BufferGetPage(destBuffer);
+		if (!XLByteLE(lsn, PageGetLSN(page)))
+		{
+			PageSetLSN(page, lsn);
+			PageSetTLI(page, ThisTimeLineID);
+			MarkBufferDirty(destBuffer);
+		}
+		UnlockReleaseBuffer(destBuffer);
+	}
+
+	/* restore new inner tuple */
+	if (!(record->xl_info & XLR_SET_BKP_BLOCK(bbi)))
+	{
+		Buffer		buffer = XLogReadBuffer(xldata->node, xldata->blknoInner,
+											xldata->initInner);
+
+		if (BufferIsValid(buffer))
+		{
+			page = BufferGetPage(buffer);
+
+			if (xldata->initInner)
+				SpGistInitBuffer(buffer, 0);
+
+			if (!XLByteLE(lsn, PageGetLSN(page)))
+			{
+				addOrReplaceTuple(page, (Item) innerTuple, innerTuple->size,
+								  xldata->offnumInner);
+
+				/* if inner is also parent, update link while we're here */
+				if (xldata->blknoInner == xldata->blknoParent)
+				{
+					SpGistInnerTuple parent;
+
+					parent = (SpGistInnerTuple) PageGetItem(page,
+									PageGetItemId(page, xldata->offnumParent));
+					updateNodeLink(parent, xldata->nodeI,
+								   xldata->blknoInner, xldata->offnumInner);
+				}
+
+				PageSetLSN(page, lsn);
+				PageSetTLI(page, ThisTimeLineID);
+				MarkBufferDirty(buffer);
+			}
+			UnlockReleaseBuffer(buffer);
+		}
+	}
+	bbi++;
+
+	/* update parent downlink, unless we did it above */
+	if (xldata->blknoParent == InvalidBlockNumber)
+	{
+		/* no parent cause we split the root */
+		Assert(xldata->blknoInner == SPGIST_HEAD_BLKNO);
+	}
+	else if (xldata->blknoInner != xldata->blknoParent)
+	{
+		if (!(record->xl_info & XLR_SET_BKP_BLOCK(bbi)))
+		{
+			Buffer		buffer = XLogReadBuffer(xldata->node, xldata->blknoParent, false);
+
+			if (BufferIsValid(buffer))
+			{
+				page = BufferGetPage(buffer);
+
+				if (!XLByteLE(lsn, PageGetLSN(page)))
+				{
+					SpGistInnerTuple parent;
+
+					parent = (SpGistInnerTuple) PageGetItem(page,
+									PageGetItemId(page, xldata->offnumParent));
+					updateNodeLink(parent, xldata->nodeI,
+								   xldata->blknoInner, xldata->offnumInner);
+
+					PageSetLSN(page, lsn);
+					PageSetTLI(page, ThisTimeLineID);
+					MarkBufferDirty(buffer);
+				}
+				UnlockReleaseBuffer(buffer);
+			}
+		}
+	}
+}
+
+static void
+spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
+{
+	char	   *ptr = XLogRecGetData(record);
+	spgxlogVacuumLeaf *xldata = (spgxlogVacuumLeaf *) ptr;
+	OffsetNumber *toDead;
+	OffsetNumber *toPlaceholder;
+	OffsetNumber *moveSrc;
+	OffsetNumber *moveDest;
+	OffsetNumber *chainSrc;
+	OffsetNumber *chainDest;
+	SpGistState state;
+	Buffer		buffer;
+	Page		page;
+	int			i;
+
+	fillFakeState(&state, xldata->stateSrc);
+
+	ptr += sizeof(spgxlogVacuumLeaf);
+	toDead = (OffsetNumber *) ptr;
+	ptr += sizeof(OffsetNumber) * xldata->nDead;
+	toPlaceholder = (OffsetNumber *) ptr;
+	ptr += sizeof(OffsetNumber) * xldata->nPlaceholder;
+	moveSrc = (OffsetNumber *) ptr;
+	ptr += sizeof(OffsetNumber) * xldata->nMove;
+	moveDest = (OffsetNumber *) ptr;
+	ptr += sizeof(OffsetNumber) * xldata->nMove;
+	chainSrc = (OffsetNumber *) ptr;
+	ptr += sizeof(OffsetNumber) * xldata->nChain;
+	chainDest = (OffsetNumber *) ptr;
+
+	if (!(record->xl_info & XLR_BKP_BLOCK_1))
+	{
+		buffer = XLogReadBuffer(xldata->node, xldata->blkno, false);
+		if (BufferIsValid(buffer))
+		{
+			page = BufferGetPage(buffer);
+			if (!XLByteLE(lsn, PageGetLSN(page)))
+			{
+				spgPageIndexMultiDelete(&state, page,
+										toDead, xldata->nDead,
+										SPGIST_DEAD, SPGIST_DEAD,
+										InvalidBlockNumber,
+										InvalidOffsetNumber);
+
+				spgPageIndexMultiDelete(&state, page,
+										toPlaceholder, xldata->nPlaceholder,
+										SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
+										InvalidBlockNumber,
+										InvalidOffsetNumber);
+
+				/* see comments in vacuumLeafPage() */
+				for (i = 0; i < xldata->nMove; i++)
+				{
+					ItemId		idSrc = PageGetItemId(page, moveSrc[i]);
+					ItemId		idDest = PageGetItemId(page, moveDest[i]);
+					ItemIdData	tmp;
+
+					tmp = *idSrc;
+					*idSrc = *idDest;
+					*idDest = tmp;
+				}
+
+				spgPageIndexMultiDelete(&state, page,
+										moveSrc, xldata->nMove,
+										SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
+										InvalidBlockNumber,
+										InvalidOffsetNumber);
+
+				for (i = 0; i < xldata->nChain; i++)
+				{
+					SpGistLeafTuple lt;
+
+					lt = (SpGistLeafTuple) PageGetItem(page,
+										   PageGetItemId(page, chainSrc[i]));
+					Assert(lt->tupstate == SPGIST_LIVE);
+					lt->nextOffset = chainDest[i];
+				}
+
+				PageSetLSN(page, lsn);
+				PageSetTLI(page, ThisTimeLineID);
+				MarkBufferDirty(buffer);
+			}
+			UnlockReleaseBuffer(buffer);
+		}
+	}
+}
+
+static void
+spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record)
+{
+	char	   *ptr = XLogRecGetData(record);
+	spgxlogVacuumRoot *xldata = (spgxlogVacuumRoot *) ptr;
+	OffsetNumber *toDelete;
+	Buffer		buffer;
+	Page		page;
+
+	ptr += sizeof(spgxlogVacuumRoot);
+	toDelete = (OffsetNumber *) ptr;
+
+	if (!(record->xl_info & XLR_BKP_BLOCK_1))
+	{
+		buffer = XLogReadBuffer(xldata->node, SPGIST_HEAD_BLKNO, false);
+		if (BufferIsValid(buffer))
+		{
+			page = BufferGetPage(buffer);
+			if (!XLByteLE(lsn, PageGetLSN(page)))
+			{
+				/* The tuple numbers are in order */
+				PageIndexMultiDelete(page, toDelete, xldata->nDelete);
+
+				PageSetLSN(page, lsn);
+				PageSetTLI(page, ThisTimeLineID);
+				MarkBufferDirty(buffer);
+			}
+			UnlockReleaseBuffer(buffer);
+		}
+	}
+}
+
+static void
+spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
+{
+	char	   *ptr = XLogRecGetData(record);
+	spgxlogVacuumRedirect *xldata = (spgxlogVacuumRedirect *) ptr;
+	OffsetNumber *itemToPlaceholder;
+	Buffer		buffer;
+	Page		page;
+
+	ptr += sizeof(spgxlogVacuumRedirect);
+	itemToPlaceholder = (OffsetNumber *) ptr;
+
+	if (!(record->xl_info & XLR_BKP_BLOCK_1))
+	{
+		buffer = XLogReadBuffer(xldata->node, xldata->blkno, false);
+
+		if (BufferIsValid(buffer))
+		{
+			page = BufferGetPage(buffer);
+			if (!XLByteLE(lsn, PageGetLSN(page)))
+			{
+				SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
+				int			i;
+
+				/* Convert redirect pointers to plain placeholders */
+				for (i = 0; i < xldata->nToPlaceholder; i++)
+				{
+					SpGistDeadTuple dt;
+
+					dt = (SpGistDeadTuple) PageGetItem(page,
+													   PageGetItemId(page, itemToPlaceholder[i]));
+					Assert(dt->tupstate == SPGIST_REDIRECT);
+					dt->tupstate = SPGIST_PLACEHOLDER;
+					ItemPointerSetInvalid(&dt->pointer);
+				}
+
+				Assert(opaque->nRedirection >= xldata->nToPlaceholder);
+				opaque->nRedirection -= xldata->nToPlaceholder;
+				opaque->nPlaceholder += xldata->nToPlaceholder;
+
+				/* Remove placeholder tuples at end of page */
+				if (xldata->firstPlaceholder != InvalidOffsetNumber)
+				{
+					int			max = PageGetMaxOffsetNumber(page);
+					OffsetNumber *toDelete;
+
+					toDelete = palloc(sizeof(OffsetNumber) * max);
+
+					for (i = xldata->firstPlaceholder; i <= max; i++)
+						toDelete[i - xldata->firstPlaceholder] = i;
+
+					i = max - xldata->firstPlaceholder + 1;
+					Assert(opaque->nPlaceholder >= i);
+					opaque->nPlaceholder -= i;
+
+					/* The array is sorted, so can use PageIndexMultiDelete */
+					PageIndexMultiDelete(page, toDelete, i);
+
+					pfree(toDelete);
+				}
+
+				PageSetLSN(page, lsn);
+				PageSetTLI(page, ThisTimeLineID);
+				MarkBufferDirty(buffer);
+			}
+
+			UnlockReleaseBuffer(buffer);
+		}
+	}
+}
+
+void
+spg_redo(XLogRecPtr lsn, XLogRecord *record)
+{
+	uint8		info = record->xl_info & ~XLR_INFO_MASK;
+	MemoryContext oldCxt;
+
+	/*
+	 * SP-GiST indexes do not require any conflict processing. NB: If we ever
+	 * implement a similar optimization as we have in b-tree, and remove
+	 * killed tuples outside VACUUM, we'll need to handle that here.
+	 */
+	RestoreBkpBlocks(lsn, record, false);
+
+	oldCxt = MemoryContextSwitchTo(opCtx);
+	switch (info)
+	{
+		case XLOG_SPGIST_CREATE_INDEX:
+			spgRedoCreateIndex(lsn, record);
+			break;
+		case XLOG_SPGIST_ADD_LEAF:
+			spgRedoAddLeaf(lsn, record);
+			break;
+		case XLOG_SPGIST_MOVE_LEAFS:
+			spgRedoMoveLeafs(lsn, record);
+			break;
+		case XLOG_SPGIST_ADD_NODE:
+			spgRedoAddNode(lsn, record);
+			break;
+		case XLOG_SPGIST_SPLIT_TUPLE:
+			spgRedoSplitTuple(lsn, record);
+			break;
+		case XLOG_SPGIST_PICKSPLIT:
+			spgRedoPickSplit(lsn, record);
+			break;
+		case XLOG_SPGIST_VACUUM_LEAF:
+			spgRedoVacuumLeaf(lsn, record);
+			break;
+		case XLOG_SPGIST_VACUUM_ROOT:
+			spgRedoVacuumRoot(lsn, record);
+			break;
+		case XLOG_SPGIST_VACUUM_REDIRECT:
+			spgRedoVacuumRedirect(lsn, record);
+			break;
+		default:
+			elog(PANIC, "spg_redo: unknown op code %u", info);
+	}
+
+	MemoryContextSwitchTo(oldCxt);
+	MemoryContextReset(opCtx);
+}
+
+static void
+out_target(StringInfo buf, RelFileNode node)
+{
+	appendStringInfo(buf, "rel %u/%u/%u ",
+					 node.spcNode, node.dbNode, node.relNode);
+}
+
+void
+spg_desc(StringInfo buf, uint8 xl_info, char *rec)
+{
+	uint8		info = xl_info & ~XLR_INFO_MASK;
+
+	switch (info)
+	{
+		case XLOG_SPGIST_CREATE_INDEX:
+			appendStringInfo(buf, "create_index: rel %u/%u/%u",
+							 ((RelFileNode *) rec)->spcNode,
+							 ((RelFileNode *) rec)->dbNode,
+							 ((RelFileNode *) rec)->relNode);
+			break;
+		case XLOG_SPGIST_ADD_LEAF:
+			out_target(buf, ((spgxlogAddLeaf *) rec)->node);
+			appendStringInfo(buf, "add leaf to page: %u",
+							 ((spgxlogAddLeaf *) rec)->blknoLeaf);
+			break;
+		case XLOG_SPGIST_MOVE_LEAFS:
+			out_target(buf, ((spgxlogMoveLeafs *) rec)->node);
+			appendStringInfo(buf, "move %u leafs from page %u to page %u",
+							 ((spgxlogMoveLeafs *) rec)->nMoves,
+							 ((spgxlogMoveLeafs *) rec)->blknoSrc,
+							 ((spgxlogMoveLeafs *) rec)->blknoDst);
+			break;
+		case XLOG_SPGIST_ADD_NODE:
+			out_target(buf, ((spgxlogAddNode *) rec)->node);
+			appendStringInfo(buf, "add node to %u:%u",
+							 ((spgxlogAddNode *) rec)->blkno,
+							 ((spgxlogAddNode *) rec)->offnum);
+			break;
+		case XLOG_SPGIST_SPLIT_TUPLE:
+			out_target(buf, ((spgxlogSplitTuple *) rec)->node);
+			appendStringInfo(buf, "split node %u:%u to %u:%u",
+							 ((spgxlogSplitTuple *) rec)->blknoPrefix,
+							 ((spgxlogSplitTuple *) rec)->offnumPrefix,
+							 ((spgxlogSplitTuple *) rec)->blknoPostfix,
+							 ((spgxlogSplitTuple *) rec)->offnumPostfix);
+			break;
+		case XLOG_SPGIST_PICKSPLIT:
+			out_target(buf, ((spgxlogPickSplit *) rec)->node);
+			appendStringInfo(buf, "split leaf page");
+			break;
+		case XLOG_SPGIST_VACUUM_LEAF:
+			out_target(buf, ((spgxlogVacuumLeaf *) rec)->node);
+			appendStringInfo(buf, "vacuum leaf tuples on page %u",
+							 ((spgxlogVacuumLeaf *) rec)->blkno);
+			break;
+		case XLOG_SPGIST_VACUUM_ROOT:
+			out_target(buf, ((spgxlogVacuumRoot *) rec)->node);
+			appendStringInfo(buf, "vacuum leaf tuples on root page");
+			break;
+		case XLOG_SPGIST_VACUUM_REDIRECT:
+			out_target(buf, ((spgxlogVacuumRedirect *) rec)->node);
+			appendStringInfo(buf, "vacuum redirect tuples on page %u",
+							 ((spgxlogVacuumRedirect *) rec)->blkno);
+			break;
+		default:
+			appendStringInfo(buf, "unknown spgist op code %u", info);
+			break;
+	}
+}
+
+void
+spg_xlog_startup(void)
+{
+	opCtx = AllocSetContextCreate(CurrentMemoryContext,
+								  "SP-GiST temporary context",
+								  ALLOCSET_DEFAULT_MINSIZE,
+								  ALLOCSET_DEFAULT_INITSIZE,
+								  ALLOCSET_DEFAULT_MAXSIZE);
+}
+
+void
+spg_xlog_cleanup(void)
+{
+	MemoryContextDelete(opCtx);
+	opCtx = NULL;
+}
diff --git a/src/backend/access/transam/rmgr.c b/src/backend/access/transam/rmgr.c
index 6a0a2d9b477..ed8754e6f22 100644
--- a/src/backend/access/transam/rmgr.c
+++ b/src/backend/access/transam/rmgr.c
@@ -14,6 +14,7 @@
 #include "access/heapam.h"
 #include "access/multixact.h"
 #include "access/nbtree.h"
+#include "access/spgist.h"
 #include "access/xact.h"
 #include "access/xlog_internal.h"
 #include "catalog/storage.h"
@@ -40,5 +41,6 @@ const RmgrData RmgrTable[RM_MAX_ID + 1] = {
 	{"Hash", hash_redo, hash_desc, NULL, NULL, NULL},
 	{"Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup, gin_safe_restartpoint},
 	{"Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup, NULL},
-	{"Sequence", seq_redo, seq_desc, NULL, NULL, NULL}
+	{"Sequence", seq_redo, seq_desc, NULL, NULL, NULL},
+	{"SPGist", spg_redo, spg_desc, spg_xlog_startup, spg_xlog_cleanup, NULL}
 };
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index f5660b2c3cd..d06809e7675 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -6555,6 +6555,26 @@ gistcostestimate(PG_FUNCTION_ARGS)
 	PG_RETURN_VOID();
 }
 
+Datum
+spgcostestimate(PG_FUNCTION_ARGS)
+{
+	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+	IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
+	List	   *indexQuals = (List *) PG_GETARG_POINTER(2);
+	List	   *indexOrderBys = (List *) PG_GETARG_POINTER(3);
+	RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
+	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
+	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
+	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
+	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(8);
+
+	genericcostestimate(root, index, indexQuals, indexOrderBys, outer_rel, 0.0,
+						indexStartupCost, indexTotalCost,
+						indexSelectivity, indexCorrelation);
+
+	PG_RETURN_VOID();
+}
+
 /* Find the index column matching "op"; return its index, or -1 if no match */
 static int
 find_index_column(Node *op, IndexOptInfo *index)
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index 290f0edaefa..ee5d71e4d71 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -24,6 +24,10 @@
  * Note: GIN does not include a page ID word as do the other index types.
  * This is OK because the opaque data is only 8 bytes and so can be reliably
  * distinguished by size.  Revisit this if the size ever increases.
+ * Further note: as of 9.2, SP-GiST also uses 8-byte special space.  This is
+ * still OK, as long as GIN isn't using all of the high-order bits in its
+ * flags word, because that way the flags word cannot match the page ID used
+ * by SP-GiST.
  */
 typedef struct GinPageOpaqueData
 {
diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h
index 14f50345bbf..10b2f9ea4db 100644
--- a/src/include/access/reloptions.h
+++ b/src/include/access/reloptions.h
@@ -42,8 +42,9 @@ typedef enum relopt_kind
 	RELOPT_KIND_GIST = (1 << 5),
 	RELOPT_KIND_ATTRIBUTE = (1 << 6),
 	RELOPT_KIND_TABLESPACE = (1 << 7),
+	RELOPT_KIND_SPGIST = (1 << 8),
 	/* if you add a new kind, make sure you update "last_default" too */
-	RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_TABLESPACE,
+	RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_SPGIST,
 	/* some compilers treat enums as signed ints, so we can't use 1 << 31 */
 	RELOPT_KIND_MAX = (1 << 30)
 } relopt_kind;
diff --git a/src/include/access/rmgr.h b/src/include/access/rmgr.h
index 83abba359a5..e4844fe96c9 100644
--- a/src/include/access/rmgr.h
+++ b/src/include/access/rmgr.h
@@ -32,6 +32,8 @@ typedef uint8 RmgrId;
 #define RM_GIN_ID				13
 #define RM_GIST_ID				14
 #define RM_SEQ_ID				15
-#define RM_MAX_ID				RM_SEQ_ID
+#define RM_SPGIST_ID			16
+
+#define RM_MAX_ID				RM_SPGIST_ID
 
 #endif   /* RMGR_H */
diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h
new file mode 100644
index 00000000000..aa655a31402
--- /dev/null
+++ b/src/include/access/spgist.h
@@ -0,0 +1,199 @@
+/*-------------------------------------------------------------------------
+ *
+ * spgist.h
+ *	  Public header file for SP-GiST access method.
+ *
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/spgist.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SPGIST_H
+#define SPGIST_H
+
+#include "access/skey.h"
+#include "access/xlog.h"
+#include "fmgr.h"
+
+
+/* reloption parameters */
+#define SPGIST_MIN_FILLFACTOR			10
+#define SPGIST_DEFAULT_FILLFACTOR		80
+
+/* SPGiST opclass support function numbers */
+#define SPGIST_CONFIG_PROC				1
+#define SPGIST_CHOOSE_PROC				2
+#define SPGIST_PICKSPLIT_PROC			3
+#define SPGIST_INNER_CONSISTENT_PROC	4
+#define SPGIST_LEAF_CONSISTENT_PROC		5
+#define SPGISTNProc						5
+
+/*
+ * Argument structs for spg_config method
+ */
+typedef struct spgConfigIn
+{
+	Oid			attType;		/* Data type to be indexed */
+} spgConfigIn;
+
+typedef struct spgConfigOut
+{
+	Oid			prefixType;		/* Data type of inner-tuple prefixes */
+	Oid			labelType;		/* Data type of inner-tuple node labels */
+	bool		longValuesOK;	/* Opclass can cope with values > 1 page */
+} spgConfigOut;
+
+/*
+ * Argument structs for spg_choose method
+ */
+typedef struct spgChooseIn
+{
+	Datum		datum;			/* original datum to be indexed */
+	Datum		leafDatum;		/* current datum to be stored at leaf */
+	int			level;			/* current level (counting from zero) */
+
+	/* Data from current inner tuple */
+	bool		allTheSame;		/* tuple is marked all-the-same? */
+	bool		hasPrefix;		/* tuple has a prefix? */
+	Datum		prefixDatum;	/* if so, the prefix value */
+	int			nNodes;			/* number of nodes in the inner tuple */
+	Datum	   *nodeLabels;		/* node label values (NULL if none) */
+} spgChooseIn;
+
+typedef enum spgChooseResultType
+{
+	spgMatchNode = 1,			/* descend into existing node */
+	spgAddNode,					/* add a node to the inner tuple */
+	spgSplitTuple				/* split inner tuple (change its prefix) */
+} spgChooseResultType;
+
+typedef struct spgChooseOut
+{
+	spgChooseResultType resultType;		/* action code, see above */
+	union
+	{
+		struct					/* results for spgMatchNode */
+		{
+			int			nodeN;		/* descend to this node (index from 0) */
+			int			levelAdd;	/* increment level by this much */
+			Datum		restDatum;	/* new leaf datum */
+		}			matchNode;
+		struct					/* results for spgAddNode */
+		{
+			Datum		nodeLabel;	/* new node's label */
+			int			nodeN;		/* where to insert it (index from 0) */
+		}			addNode;
+		struct					/* results for spgSplitTuple */
+		{
+			/* Info to form new inner tuple with one node */
+			bool		prefixHasPrefix;	/* tuple should have a prefix? */
+			Datum		prefixPrefixDatum;	/* if so, its value */
+			Datum		nodeLabel;			/* node's label */
+
+			/* Info to form new lower-level inner tuple with all old nodes */
+			bool		postfixHasPrefix;	/* tuple should have a prefix? */
+			Datum		postfixPrefixDatum;	/* if so, its value */
+		}			splitTuple;
+	}			result;
+} spgChooseOut;
+
+/*
+ * Argument structs for spg_picksplit method
+ */
+typedef struct spgPickSplitIn
+{
+	int			nTuples;		/* number of leaf tuples */
+	Datum	   *datums;			/* their datums (array of length nTuples) */
+	int			level;			/* current level (counting from zero) */
+} spgPickSplitIn;
+
+typedef struct spgPickSplitOut
+{
+	bool		hasPrefix;		/* new inner tuple should have a prefix? */
+	Datum		prefixDatum;	/* if so, its value */
+
+	int			nNodes;			/* number of nodes for new inner tuple */
+	Datum	   *nodeLabels;		/* their labels (or NULL for no labels) */
+
+	int		   *mapTuplesToNodes;	/* node index for each leaf tuple */
+	Datum	   *leafTupleDatums;	/* datum to store in each new leaf tuple */
+} spgPickSplitOut;
+
+/*
+ * Argument structs for spg_inner_consistent method
+ */
+typedef struct spgInnerConsistentIn
+{
+	StrategyNumber strategy;	/* operator strategy number */
+	Datum		query;			/* operator's RHS value */
+
+	Datum		reconstructedValue;		/* value reconstructed at parent */
+	int			level;			/* current level (counting from zero) */
+
+	/* Data from current inner tuple */
+	bool		allTheSame;		/* tuple is marked all-the-same? */
+	bool		hasPrefix;		/* tuple has a prefix? */
+	Datum		prefixDatum;	/* if so, the prefix value */
+	int			nNodes;			/* number of nodes in the inner tuple */
+	Datum	   *nodeLabels;		/* node label values (NULL if none) */
+} spgInnerConsistentIn;
+
+typedef struct spgInnerConsistentOut
+{
+	int			nNodes;			/* number of child nodes to be visited */
+	int		   *nodeNumbers;	/* their indexes in the node array */
+	int		   *levelAdds;		/* increment level by this much for each */
+	Datum	   *reconstructedValues;	/* associated reconstructed values */
+} spgInnerConsistentOut;
+
+/*
+ * Argument structs for spg_leaf_consistent method
+ */
+typedef struct spgLeafConsistentIn
+{
+	StrategyNumber strategy;	/* operator strategy number */
+	Datum		query;			/* operator's RHS value */
+
+	Datum		reconstructedValue;		/* value reconstructed at parent */
+	int			level;			/* current level (counting from zero) */
+
+	Datum		leafDatum;		/* datum in leaf tuple */
+} spgLeafConsistentIn;
+
+typedef struct spgLeafConsistentOut
+{
+	bool		recheck;		/* set true if operator must be rechecked */
+} spgLeafConsistentOut;
+
+
+/* spginsert.c */
+extern Datum spgbuild(PG_FUNCTION_ARGS);
+extern Datum spgbuildempty(PG_FUNCTION_ARGS);
+extern Datum spginsert(PG_FUNCTION_ARGS);
+
+/* spgscan.c */
+extern Datum spgbeginscan(PG_FUNCTION_ARGS);
+extern Datum spgendscan(PG_FUNCTION_ARGS);
+extern Datum spgrescan(PG_FUNCTION_ARGS);
+extern Datum spgmarkpos(PG_FUNCTION_ARGS);
+extern Datum spgrestrpos(PG_FUNCTION_ARGS);
+extern Datum spggetbitmap(PG_FUNCTION_ARGS);
+extern Datum spggettuple(PG_FUNCTION_ARGS);
+
+/* spgutils.c */
+extern Datum spgoptions(PG_FUNCTION_ARGS);
+
+/* spgvacuum.c */
+extern Datum spgbulkdelete(PG_FUNCTION_ARGS);
+extern Datum spgvacuumcleanup(PG_FUNCTION_ARGS);
+
+/* spgxlog.c */
+extern void spg_redo(XLogRecPtr lsn, XLogRecord *record);
+extern void spg_desc(StringInfo buf, uint8 xl_info, char *rec);
+extern void spg_xlog_startup(void);
+extern void spg_xlog_cleanup(void);
+
+#endif   /* SPGIST_H */
diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h
new file mode 100644
index 00000000000..5c57799f09c
--- /dev/null
+++ b/src/include/access/spgist_private.h
@@ -0,0 +1,609 @@
+/*-------------------------------------------------------------------------
+ *
+ * spgist_private.h
+ *	  Private declarations for SP-GiST access method.
+ *
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/spgist_private.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SPGIST_PRIVATE_H
+#define SPGIST_PRIVATE_H
+
+#include "access/itup.h"
+#include "access/spgist.h"
+#include "nodes/tidbitmap.h"
+#include "utils/rel.h"
+
+
+/* Page numbers of fixed-location pages */
+#define SPGIST_METAPAGE_BLKNO	 (0)
+#define SPGIST_HEAD_BLKNO		 (1)
+
+/*
+ * Contents of page special space on SPGiST index pages
+ */
+typedef struct SpGistPageOpaqueData
+{
+	uint16		flags;			/* see bit definitions below */
+	uint16		nRedirection;	/* number of redirection tuples on page */
+	uint16		nPlaceholder;	/* number of placeholder tuples on page */
+	/* note there's no count of either LIVE or DEAD tuples ... */
+	uint16		spgist_page_id;	/* for identification of SP-GiST indexes */
+} SpGistPageOpaqueData;
+
+typedef SpGistPageOpaqueData *SpGistPageOpaque;
+
+/* Flag bits in page special space */
+#define SPGIST_META			(1<<0)
+#define SPGIST_DELETED		(1<<1)
+#define SPGIST_LEAF			(1<<2)
+
+#define SpGistPageGetOpaque(page) ((SpGistPageOpaque) PageGetSpecialPointer(page))
+#define SpGistPageIsMeta(page) (SpGistPageGetOpaque(page)->flags & SPGIST_META)
+#define SpGistPageIsDeleted(page) (SpGistPageGetOpaque(page)->flags & SPGIST_DELETED)
+#define SpGistPageSetDeleted(page) (SpGistPageGetOpaque(page)->flags |= SPGIST_DELETED)
+#define SpGistPageSetNonDeleted(page) (SpGistPageGetOpaque(page)->flags &= ~SPGIST_DELETED)
+#define SpGistPageIsLeaf(page) (SpGistPageGetOpaque(page)->flags & SPGIST_LEAF)
+#define SpGistPageSetLeaf(page) (SpGistPageGetOpaque(page)->flags |= SPGIST_LEAF)
+#define SpGistPageSetInner(page) (SpGistPageGetOpaque(page)->flags &= ~SPGIST_LEAF)
+
+/*
+ * The page ID is for the convenience of pg_filedump and similar utilities,
+ * which otherwise would have a hard time telling pages of different index
+ * types apart.  It should be the last 2 bytes on the page.  This is more or
+ * less "free" due to alignment considerations.
+ */
+#define SPGIST_PAGE_ID		0xFF82
+
+/*
+ * Each backend keeps a cache of last-used page info in its index->rd_amcache
+ * area.  This is initialized from, and occasionally written back to,
+ * shared storage in the index metapage.
+ */
+typedef struct SpGistLastUsedPage
+{
+	BlockNumber blkno;			/* block number of described page */
+	int			freeSpace;		/* its free space (could be obsolete!) */
+} SpGistLastUsedPage;
+
+typedef struct SpGistCache
+{
+	SpGistLastUsedPage innerPage[3];	/* one per triple-parity group */
+	SpGistLastUsedPage leafPage;
+} SpGistCache;
+
+/*
+ * metapage
+ */
+typedef struct SpGistMetaPageData
+{
+	uint32		magicNumber;	/* for identity cross-check */
+	SpGistCache lastUsedPages;	/* shared storage of last-used info */
+} SpGistMetaPageData;
+
+#define SPGIST_MAGIC_NUMBER (0xBA0BABED)
+
+#define SpGistPageGetMeta(p) \
+	((SpGistMetaPageData *) PageGetContents(p))
+
+/*
+ * Private state of index AM.  SpGistState is common to both insert and
+ * search code; SpGistScanOpaque is for searches only.
+ */
+
+/* Per-datatype info needed in SpGistState */
+typedef struct SpGistTypeDesc
+{
+	Oid			type;
+	bool		attbyval;
+	int16		attlen;
+} SpGistTypeDesc;
+
+typedef struct SpGistState
+{
+	spgConfigOut config;		/* filled in by opclass config method */
+
+	SpGistTypeDesc attType;			/* type of input data and leaf values */
+	SpGistTypeDesc attPrefixType;	/* type of inner-tuple prefix values */
+	SpGistTypeDesc attLabelType;	/* type of node label values */
+
+	/* lookup data for the opclass support functions, except config */
+	FmgrInfo	chooseFn;
+	FmgrInfo	picksplitFn;
+	FmgrInfo	innerConsistentFn;
+	FmgrInfo	leafConsistentFn;
+
+	char	   *deadTupleStorage;	/* workspace for spgFormDeadTuple */
+
+	TransactionId myXid;		/* XID to use when creating a redirect tuple */
+	bool		isBuild;		/* true if doing index build */
+} SpGistState;
+
+/*
+ * Private state of an index scan
+ */
+typedef struct SpGistScanOpaqueData
+{
+	SpGistState state;			/* see above */
+	MemoryContext tempCxt;		/* short-lived memory context */
+
+	/* Index quals for scan (copied from IndexScanDesc for convenience) */
+	int			numberOfKeys;	/* number of index qualifier conditions */
+	ScanKey		keyData;		/* array of index qualifier descriptors */
+
+	/* Stack of yet-to-be-visited pages */
+	List	   *scanStack;		/* List of ScanStackEntrys */
+
+	/* These fields are only used in amgetbitmap scans: */
+	TIDBitmap  *tbm;			/* bitmap being filled */
+	int64		ntids;			/* number of TIDs passed to bitmap */
+
+	/* These fields are only used in amgettuple scans: */
+	int			nPtrs;			/* number of TIDs found on current page */
+	int			iPtr;			/* index for scanning through same */
+	ItemPointerData heapPtrs[MaxIndexTuplesPerPage]; /* TIDs from cur page */
+	bool		recheck[MaxIndexTuplesPerPage];		/* their recheck flags */
+
+	/*
+	 * Note: using MaxIndexTuplesPerPage above is a bit hokey since
+	 * SpGistLeafTuples aren't exactly IndexTuples; however, they are
+	 * larger, so this is safe.
+	 */
+} SpGistScanOpaqueData;
+
+typedef SpGistScanOpaqueData *SpGistScanOpaque;
+
+
+/*
+ * SPGiST tuple types.  Note: inner, leaf, and dead tuple structs
+ * must have the same tupstate field in the same position!  Real inner and
+ * leaf tuples always have tupstate = LIVE; if the state is something else,
+ * use the SpGistDeadTuple struct to inspect the tuple.
+ */
+
+/* values of tupstate (see README for more info) */
+#define SPGIST_LIVE			0	/* normal live tuple (either inner or leaf) */
+#define SPGIST_REDIRECT		1	/* temporary redirection placeholder */
+#define SPGIST_DEAD			2	/* dead, cannot be removed because of links */
+#define SPGIST_PLACEHOLDER	3	/* placeholder, used to preserve offsets */
+
+/*
+ * SPGiST inner tuple: list of "nodes" that subdivide a set of tuples
+ *
+ * Inner tuple layout:
+ * header/optional prefix/array of nodes, which are SpGistNodeTuples
+ *
+ * size and prefixSize must be multiples of MAXALIGN
+ */
+typedef struct SpGistInnerTupleData
+{
+	unsigned int tupstate:2,	/* LIVE/REDIRECT/DEAD/PLACEHOLDER */
+				allTheSame:1,	/* all nodes in tuple are equivalent */
+				nNodes:13,		/* number of nodes within inner tuple */
+				prefixSize:16;	/* size of prefix, or 0 if none */
+	uint16		size;			/* total size of inner tuple */
+	/* On most machines there will be a couple of wasted bytes here */
+	/* prefix datum follows, then nodes */
+} SpGistInnerTupleData;
+
+typedef SpGistInnerTupleData *SpGistInnerTuple;
+
+/* these must match largest values that fit in bit fields declared above */
+#define SGITMAXNNODES		0x1FFF
+#define SGITMAXPREFIXSIZE	0xFFFF
+#define SGITMAXSIZE			0xFFFF
+
+#define SGITHDRSZ			MAXALIGN(sizeof(SpGistInnerTupleData))
+#define _SGITDATA(x)		(((char *) (x)) + SGITHDRSZ)
+#define SGITDATAPTR(x)		((x)->prefixSize ? _SGITDATA(x) : NULL)
+#define SGITDATUM(x, s)		((x)->prefixSize ? \
+							 ((s)->attPrefixType.attbyval ? \
+							  *(Datum *) _SGITDATA(x) : \
+							  PointerGetDatum(_SGITDATA(x))) \
+							 : (Datum) 0)
+#define SGITNODEPTR(x)		((SpGistNodeTuple) (_SGITDATA(x) + (x)->prefixSize))
+
+/* Macro for iterating through the nodes of an inner tuple */
+#define SGITITERATE(x, i, nt)	\
+	for ((i) = 0, (nt) = SGITNODEPTR(x); \
+		 (i) < (x)->nNodes; \
+		 (i)++, (nt) = (SpGistNodeTuple) (((char *) (nt)) + IndexTupleSize(nt)))
+
+/*
+ * SPGiST node tuple: one node within an inner tuple
+ *
+ * Node tuples use the same header as ordinary Postgres IndexTuples, but
+ * we do not use a null bitmap, because we know there is only one column
+ * so the INDEX_NULL_MASK bit suffices.  Also, pass-by-value datums are
+ * stored as a full Datum, the same convention as for inner tuple prefixes
+ * and leaf tuple datums.
+ */
+
+typedef IndexTupleData SpGistNodeTupleData;
+
+typedef SpGistNodeTupleData *SpGistNodeTuple;
+
+#define SGNTHDRSZ			MAXALIGN(sizeof(SpGistNodeTupleData))
+#define SGNTDATAPTR(x)		(((char *) (x)) + SGNTHDRSZ)
+#define SGNTDATUM(x, s)		((s)->attLabelType.attbyval ? \
+							 *(Datum *) SGNTDATAPTR(x) : \
+							 PointerGetDatum(SGNTDATAPTR(x)))
+
+/*
+ * SPGiST leaf tuple: carries a datum and a heap tuple TID
+ *
+ * In the simplest case, the datum is the same as the indexed value; but
+ * it could also be a suffix or some other sort of delta that permits
+ * reconstruction given knowledge of the prefix path traversed to get here.
+ *
+ * The size field is wider than could possibly be needed for an on-disk leaf
+ * tuple, but this allows us to form leaf tuples even when the datum is too
+ * wide to be stored immediately, and it costs nothing because of alignment
+ * considerations.
+ *
+ * Normally, nextOffset links to the next tuple belonging to the same parent
+ * node (which must be on the same page).  But when the root page is a leaf
+ * page, we don't chain its tuples, so nextOffset is always 0 on the root.
+ *
+ * size must be a multiple of MAXALIGN
+ */
+typedef struct SpGistLeafTupleData
+{
+	unsigned int tupstate:2,	/* LIVE/REDIRECT/DEAD/PLACEHOLDER */
+				size:30;		/* large enough for any palloc'able value */
+	OffsetNumber nextOffset;	/* next tuple in chain, or InvalidOffset */
+	ItemPointerData heapPtr;	/* TID of represented heap tuple */
+	/* leaf datum follows */
+} SpGistLeafTupleData;
+
+typedef SpGistLeafTupleData *SpGistLeafTuple;
+
+#define SGLTHDRSZ			MAXALIGN(sizeof(SpGistLeafTupleData))
+#define SGLTDATAPTR(x)		(((char *) (x)) + SGLTHDRSZ)
+#define SGLTDATUM(x, s)		((s)->attType.attbyval ? \
+							 *(Datum *) SGLTDATAPTR(x) : \
+							 PointerGetDatum(SGLTDATAPTR(x)))
+
+/*
+ * SPGiST dead tuple: declaration for examining non-live tuples
+ *
+ * The tupstate field of this struct must match those of regular inner and
+ * leaf tuples, and its size field must match a leaf tuple's.
+ * Also, the pointer field must be in the same place as a leaf tuple's heapPtr
+ * field, to satisfy some Asserts that we make when replacing a leaf tuple
+ * with a dead tuple.
+ * We don't use nextOffset, but it's needed to align the pointer field.
+ * pointer and xid are only valid when tupstate = REDIRECT.
+ */
+typedef struct SpGistDeadTupleData
+{
+	unsigned int tupstate:2,	/* LIVE/REDIRECT/DEAD/PLACEHOLDER */
+				size:30;
+	OffsetNumber nextOffset;	/* not used in dead tuples */
+	ItemPointerData pointer;	/* redirection inside index */
+	TransactionId xid;			/* ID of xact that inserted this tuple */
+} SpGistDeadTupleData;
+
+typedef SpGistDeadTupleData *SpGistDeadTuple;
+
+#define SGDTSIZE		MAXALIGN(sizeof(SpGistDeadTupleData))
+
+/*
+ * Macros for doing free-space calculations.  Note that when adding up the
+ * space needed for tuples, we always consider each tuple to need the tuple's
+ * size plus sizeof(ItemIdData) (for the line pointer).  This works correctly
+ * so long as tuple sizes are always maxaligned.
+ */
+
+/* Page capacity after allowing for fixed header and special space */
+#define SPGIST_PAGE_CAPACITY  \
+	MAXALIGN_DOWN(BLCKSZ - \
+				  SizeOfPageHeaderData - \
+				  MAXALIGN(sizeof(SpGistPageOpaqueData)))
+
+/*
+ * Compute free space on page, assuming that up to n placeholders can be
+ * recycled if present (n should be the number of tuples to be inserted)
+ */
+#define SpGistPageGetFreeSpace(p, n) \
+	(PageGetExactFreeSpace(p) + \
+	 Min(SpGistPageGetOpaque(p)->nPlaceholder, n) * \
+	 (SGDTSIZE + sizeof(ItemIdData)))
+
+/*
+ * XLOG stuff
+ *
+ * ACCEPT_RDATA_* can only use fixed-length rdata arrays, because of lengthof
+ */
+
+#define ACCEPT_RDATA_DATA(p, s, i)  \
+	do { \
+		Assert((i) < lengthof(rdata)); \
+		rdata[i].data = (char *) (p); \
+		rdata[i].len = (s); \
+		rdata[i].buffer = InvalidBuffer; \
+		rdata[i].buffer_std = true; \
+		rdata[i].next = NULL; \
+		if ((i) > 0) \
+			rdata[(i) - 1].next = rdata + (i); \
+	} while(0)
+
+#define ACCEPT_RDATA_BUFFER(b, i)  \
+	do { \
+		Assert((i) < lengthof(rdata)); \
+		rdata[i].data = NULL; \
+		rdata[i].len = 0; \
+		rdata[i].buffer = (b); \
+		rdata[i].buffer_std = true; \
+		rdata[i].next = NULL; \
+		if ((i) > 0) \
+			rdata[(i) - 1].next = rdata + (i); \
+	} while(0)
+
+
+/* XLOG record types for SPGiST */
+#define XLOG_SPGIST_CREATE_INDEX	0x00
+#define XLOG_SPGIST_ADD_LEAF		0x10
+#define XLOG_SPGIST_MOVE_LEAFS		0x20
+#define XLOG_SPGIST_ADD_NODE		0x30
+#define XLOG_SPGIST_SPLIT_TUPLE		0x40
+#define XLOG_SPGIST_PICKSPLIT		0x50
+#define XLOG_SPGIST_VACUUM_LEAF		0x60
+#define XLOG_SPGIST_VACUUM_ROOT		0x70
+#define XLOG_SPGIST_VACUUM_REDIRECT	0x80
+
+/*
+ * Some redo functions need an SpGistState, although only a few of its fields
+ * need to be valid.  spgxlogState carries the required info in xlog records.
+ * (See fillFakeState in spgxlog.c for more comments.)
+ */
+typedef struct spgxlogState
+{
+	TransactionId myXid;
+	bool		isBuild;
+} spgxlogState;
+
+#define STORE_STATE(s, d)  \
+	do { \
+		(d).myXid = (s)->myXid; \
+		(d).isBuild = (s)->isBuild; \
+	} while(0)
+
+
+typedef struct spgxlogAddLeaf
+{
+	RelFileNode node;
+
+	BlockNumber blknoLeaf;		/* destination page for leaf tuple */
+	bool		newPage;		/* init dest page? */
+	OffsetNumber offnumLeaf;	/* offset where leaf tuple gets placed */
+	OffsetNumber offnumHeadLeaf; /* offset of head tuple in chain, if any */
+
+	BlockNumber blknoParent;	/* where the parent downlink is, if any */
+	OffsetNumber offnumParent;
+	uint16		nodeI;
+
+	/*
+	 * new leaf tuple follows, on an intalign boundary (replay only needs to
+	 * fetch its size field, so that should be enough alignment)
+	 */
+} spgxlogAddLeaf;
+
+typedef struct spgxlogMoveLeafs
+{
+	RelFileNode node;
+
+	BlockNumber blknoSrc;		/* source leaf page */
+	BlockNumber blknoDst;		/* destination leaf page */
+	uint16		nMoves;			/* number of tuples moved from source page */
+	bool		newPage;		/* init dest page? */
+	bool		replaceDead;	/* are we replacing a DEAD source tuple? */
+
+	BlockNumber blknoParent;	/* where the parent downlink is */
+	OffsetNumber offnumParent;
+	uint16		nodeI;
+
+	spgxlogState stateSrc;
+
+	/*----------
+	 * data follows:
+	 *		array of deleted tuple numbers, length nMoves
+	 *		array of inserted tuple numbers, length nMoves + 1 or 1
+	 *		list of leaf tuples, length nMoves + 1 or 1 (must be maxaligned)
+	 * the tuple number arrays are padded to maxalign boundaries so that the
+	 * leaf tuples will be suitably aligned
+	 *
+	 * Note: if replaceDead is true then there is only one inserted tuple
+	 * number and only one leaf tuple in the data, because we are not copying
+	 * the dead tuple from the source
+	 *
+	 * Buffer references in the rdata array are:
+	 *		Src page
+	 *		Dest page
+	 *		Parent page
+	 *----------
+	 */
+} spgxlogMoveLeafs;
+
+typedef struct spgxlogAddNode
+{
+	RelFileNode node;
+
+	BlockNumber blkno;			/* block number of original inner tuple */
+	OffsetNumber offnum;		/* offset of original inner tuple */
+
+	BlockNumber blknoParent;	/* where parent downlink is, if updated */
+	OffsetNumber offnumParent;
+	uint16		nodeI;
+
+	BlockNumber blknoNew;		/* where new tuple goes, if not same place */
+	OffsetNumber offnumNew;
+	bool		newPage;		/* init new page? */
+
+	spgxlogState stateSrc;
+
+	/*
+	 * updated inner tuple follows, on an intalign boundary (replay only needs
+	 * to fetch its size field, so that should be enough alignment)
+	 */
+} spgxlogAddNode;
+
+typedef struct spgxlogSplitTuple
+{
+	RelFileNode node;
+
+	BlockNumber blknoPrefix;	/* where the prefix tuple goes */
+	OffsetNumber offnumPrefix;
+
+	BlockNumber blknoPostfix;	/* where the postfix tuple goes */
+	OffsetNumber offnumPostfix;
+	bool		newPage;		/* need to init that page? */
+
+	/*
+	 * new prefix inner tuple follows, then new postfix inner tuple, on
+	 * intalign boundaries (replay only needs to fetch size fields, so that
+	 * should be enough alignment)
+	 */
+} spgxlogSplitTuple;
+
+typedef struct spgxlogPickSplit
+{
+	RelFileNode node;
+
+	BlockNumber blknoSrc;		/* original leaf page */
+	BlockNumber blknoDest;		/* other leaf page, if any */
+	uint16		nDelete;		/* n to delete from Src */
+	uint16		nInsert;		/* n to insert on Src and/or Dest */
+	bool		initSrc;		/* re-init the Src page? */
+	bool		initDest;		/* re-init the Dest page? */
+
+	BlockNumber blknoInner;		/* where to put new inner tuple */
+	OffsetNumber offnumInner;
+	bool		initInner;		/* re-init the Inner page? */
+
+	BlockNumber blknoParent;	/* where the parent downlink is, if any */
+	OffsetNumber offnumParent;
+	uint16		nodeI;
+
+	spgxlogState stateSrc;
+
+	/*----------
+	 * data follows:
+	 *		new inner tuple (assumed to have a maxaligned length)
+	 *		array of deleted tuple numbers, length nDelete
+	 *		array of inserted tuple numbers, length nInsert
+	 *		array of page selector bytes for inserted tuples, length nInsert
+	 *		list of leaf tuples, length nInsert (must be maxaligned)
+	 * the tuple number and page selector arrays are padded to maxalign
+	 * boundaries so that the leaf tuples will be suitably aligned
+	 *
+	 * Buffer references in the rdata array are:
+	 *		Src page (only if not root and not being init'd)
+	 *		Dest page (if used and not being init'd)
+	 *		Inner page (only if not being init'd)
+	 *		Parent page (if any; could be same as Inner)
+	 *----------
+	 */
+} spgxlogPickSplit;
+
+typedef struct spgxlogVacuumLeaf
+{
+	RelFileNode node;
+
+	BlockNumber blkno;			/* block number to clean */
+	uint16		nDead;			/* number of tuples to become DEAD */
+	uint16		nPlaceholder;	/* number of tuples to become PLACEHOLDER */
+	uint16		nMove;			/* number of tuples to move */
+	uint16		nChain;			/* number of tuples to re-chain */
+
+	spgxlogState stateSrc;
+
+	/*----------
+	 * data follows:
+	 *		tuple numbers to become DEAD
+	 *		tuple numbers to become PLACEHOLDER
+	 *		tuple numbers to move from (and replace with PLACEHOLDER)
+	 *		tuple numbers to move to (replacing what is there)
+	 *		tuple numbers to update nextOffset links of
+	 *		tuple numbers to insert in nextOffset links
+	 *----------
+	 */
+} spgxlogVacuumLeaf;
+
+typedef struct spgxlogVacuumRoot
+{
+	/* vacuum root page when it is a leaf */
+	RelFileNode node;
+
+	uint16		nDelete;		/* number of tuples to delete */
+
+	spgxlogState stateSrc;
+
+	/* offsets of tuples to delete follow */
+} spgxlogVacuumRoot;
+
+typedef struct spgxlogVacuumRedirect
+{
+	RelFileNode node;
+
+	BlockNumber blkno;			/* block number to clean */
+	uint16		nToPlaceholder;	/* number of redirects to make placeholders */
+	OffsetNumber firstPlaceholder;		/* first placeholder tuple to remove */
+
+	/* offsets of redirect tuples to make placeholders follow */
+} spgxlogVacuumRedirect;
+
+/*
+ * The "flags" argument for SpGistGetBuffer should be either GBUF_LEAF to
+ * get a leaf page, or GBUF_INNER_PARITY(blockNumber) to get an inner
+ * page in the same triple-parity group as the specified block number.
+ * (Typically, this should be GBUF_INNER_PARITY(parentBlockNumber + 1)
+ * to follow the rule described in spgist/README.)
+ */
+#define GBUF_PARITY_MASK		0x03
+#define GBUF_LEAF				0x04
+#define GBUF_INNER_PARITY(x)	((x) % 3)
+
+/* spgutils.c */
+extern void initSpGistState(SpGistState *state, Relation index);
+extern Buffer SpGistNewBuffer(Relation index);
+extern void SpGistUpdateMetaPage(Relation index);
+extern Buffer SpGistGetBuffer(Relation index, int flags,
+							  int needSpace, bool *isNew);
+extern void SpGistSetLastUsedPage(Relation index, Buffer buffer);
+extern void SpGistInitPage(Page page, uint16 f);
+extern void SpGistInitBuffer(Buffer b, uint16 f);
+extern void SpGistInitMetapage(Page page);
+extern unsigned int SpGistGetTypeSize(SpGistTypeDesc *att, Datum datum);
+extern SpGistLeafTuple spgFormLeafTuple(SpGistState *state,
+										ItemPointer heapPtr, Datum datum);
+extern SpGistNodeTuple spgFormNodeTuple(SpGistState *state,
+										Datum label, bool isnull);
+extern SpGistInnerTuple spgFormInnerTuple(SpGistState *state,
+										  bool hasPrefix, Datum prefix,
+										  int nNodes, SpGistNodeTuple *nodes);
+extern SpGistDeadTuple spgFormDeadTuple(SpGistState *state, int tupstate,
+				 BlockNumber blkno, OffsetNumber offnum);
+extern Datum *spgExtractNodeLabels(SpGistState *state,
+								   SpGistInnerTuple innerTuple);
+extern OffsetNumber SpGistPageAddNewItem(SpGistState *state, Page page,
+					 Item item, Size size,
+					 OffsetNumber *startOffset,
+					 bool errorOK);
+
+/* spgdoinsert.c */
+extern void updateNodeLink(SpGistInnerTuple tup, int nodeN,
+						   BlockNumber blkno, OffsetNumber offset);
+extern void spgPageIndexMultiDelete(SpGistState *state, Page page,
+						OffsetNumber *itemnos, int nitems,
+						int firststate, int reststate,
+						BlockNumber blkno, OffsetNumber offnum);
+extern void spgdoinsert(Relation index, SpGistState *state,
+						ItemPointer heapPtr, Datum datum);
+
+#endif   /* SPGIST_PRIVATE_H */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 14e177dc482..eb343545915 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	201112071
+#define CATALOG_VERSION_NO	201112171
 
 #endif
diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h
index ddacdf274c4..6fdd1d5b052 100644
--- a/src/include/catalog/pg_am.h
+++ b/src/include/catalog/pg_am.h
@@ -117,17 +117,20 @@ typedef FormData_pg_am *Form_pg_am;
  * ----------------
  */
 
-DATA(insert OID = 403 (  btree	5 2 t f t t t t t t t f t t 0 btinsert btbeginscan btgettuple btgetbitmap btrescan btendscan btmarkpos btrestrpos btbuild btbuildempty btbulkdelete btvacuumcleanup btcostestimate btoptions ));
+DATA(insert OID = 403 (  btree		5 2 t f t t t t t t t f t t 0 btinsert btbeginscan btgettuple btgetbitmap btrescan btendscan btmarkpos btrestrpos btbuild btbuildempty btbulkdelete btvacuumcleanup btcostestimate btoptions ));
 DESCR("b-tree index access method");
 #define BTREE_AM_OID 403
-DATA(insert OID = 405 (  hash	1 1 f f t f f f f f f f f f 23 hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbuildempty hashbulkdelete hashvacuumcleanup hashcostestimate hashoptions ));
+DATA(insert OID = 405 (  hash		1 1 f f t f f f f f f f f f 23 hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbuildempty hashbulkdelete hashvacuumcleanup hashcostestimate hashoptions ));
 DESCR("hash index access method");
 #define HASH_AM_OID 405
-DATA(insert OID = 783 (  gist	0 8 f t f f t f t f t t t f 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbuildempty gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions ));
+DATA(insert OID = 783 (  gist		0 8 f t f f t f t f t t t f 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbuildempty gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions ));
 DESCR("GiST index access method");
 #define GIST_AM_OID 783
-DATA(insert OID = 2742 (  gin	0 5 f f f f t f t f f t f f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbuildempty ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
+DATA(insert OID = 2742 (  gin		0 5 f f f f t f t f f t f f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbuildempty ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
 DESCR("GIN index access method");
 #define GIN_AM_OID 2742
+DATA(insert OID = 4000 (  spgist	0 5 f f f f f f f f f f f f 0 spginsert spgbeginscan spggettuple spggetbitmap spgrescan spgendscan spgmarkpos spgrestrpos spgbuild spgbuildempty spgbulkdelete spgvacuumcleanup spgcostestimate spgoptions ));
+DESCR("SP-GiST index access method");
+#define SPGIST_AM_OID 4000
 
 #endif   /* PG_AM_H */
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h
index 1e8c9a289f9..cb394e03e40 100644
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -737,4 +737,37 @@ DATA(insert (	3919   3831 3831 8 s	3892 783 0 ));
 DATA(insert (	3919   3831 2283 16 s	3889 783 0 ));
 DATA(insert (	3919   3831 3831 18 s	3882 783 0 ));
 
+/*
+ * SP-GiST quad_point_ops
+ */
+DATA(insert (	4015   600 600 11 s 506 4000 0 ));
+DATA(insert (	4015   600 600 1 s	507 4000 0 ));
+DATA(insert (	4015   600 600 5 s	508 4000 0 ));
+DATA(insert (	4015   600 600 10 s 509 4000 0 ));
+DATA(insert (	4015   600 600 6 s	510 4000 0 ));
+DATA(insert (	4015   600 603 8 s	511 4000 0 ));
+
+/*
+ * SP-GiST kd_point_ops
+ */
+DATA(insert (	4016   600 600 11 s 506 4000 0 ));
+DATA(insert (	4016   600 600 1 s	507 4000 0 ));
+DATA(insert (	4016   600 600 5 s	508 4000 0 ));
+DATA(insert (	4016   600 600 10 s 509 4000 0 ));
+DATA(insert (	4016   600 600 6 s	510 4000 0 ));
+DATA(insert (	4016   600 603 8 s	511 4000 0 ));
+
+/*
+ * SP-GiST text_ops
+ */
+DATA(insert (	4017   25 25 1 s	2314 4000 0 ));
+DATA(insert (	4017   25 25 2 s	2315 4000 0 ));
+DATA(insert (	4017   25 25 3 s	98	4000 0 ));
+DATA(insert (	4017   25 25 4 s	2317 4000 0 ));
+DATA(insert (	4017   25 25 5 s	2318 4000 0 ));
+DATA(insert (	4017   25 25 11 s	664 4000 0 ));
+DATA(insert (	4017   25 25 12 s	665 4000 0 ));
+DATA(insert (	4017   25 25 14 s	667 4000 0 ));
+DATA(insert (	4017   25 25 15 s	666 4000 0 ));
+
 #endif   /* PG_AMOP_H */
diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h
index 8571dd08709..a4c49efed83 100644
--- a/src/include/catalog/pg_amproc.h
+++ b/src/include/catalog/pg_amproc.h
@@ -356,4 +356,22 @@ DATA(insert (	3919   3831 3831 5 3879 ));
 DATA(insert (	3919   3831 3831 6 3880 ));
 DATA(insert (	3919   3831 3831 7 3881 ));
 
+
+/* sp-gist */
+DATA(insert (	4015   600 600 1 4018 ));
+DATA(insert (	4015   600 600 2 4019 ));
+DATA(insert (	4015   600 600 3 4020 ));
+DATA(insert (	4015   600 600 4 4021 ));
+DATA(insert (	4015   600 600 5 4022 ));
+DATA(insert (	4016   600 600 1 4023 ));
+DATA(insert (	4016   600 600 2 4024 ));
+DATA(insert (	4016   600 600 3 4025 ));
+DATA(insert (	4016   600 600 4 4026 ));
+DATA(insert (	4016   600 600 5 4022 ));
+DATA(insert (	4017   25 25 1 4027 ));
+DATA(insert (	4017   25 25 2 4028 ));
+DATA(insert (	4017   25 25 3 4029 ));
+DATA(insert (	4017   25 25 4 4030 ));
+DATA(insert (	4017   25 25 5 4031 ));
+
 #endif   /* PG_AMPROC_H */
diff --git a/src/include/catalog/pg_opclass.h b/src/include/catalog/pg_opclass.h
index eecd3b63c50..c692ae4311b 100644
--- a/src/include/catalog/pg_opclass.h
+++ b/src/include/catalog/pg_opclass.h
@@ -223,5 +223,8 @@ DATA(insert (	783		tsquery_ops			PGNSP PGUID 3702  3615 t 20 ));
 DATA(insert (	403		range_ops			PGNSP PGUID 3901  3831 t 0 ));
 DATA(insert (	405		range_ops			PGNSP PGUID 3903  3831 t 0 ));
 DATA(insert (	783		range_ops			PGNSP PGUID 3919  3831 t 0 ));
+DATA(insert (	4000	quad_point_ops		PGNSP PGUID 4015  600 t 0 ));
+DATA(insert (	4000	kd_point_ops		PGNSP PGUID 4016  600 f 0 ));
+DATA(insert (	4000	text_ops			PGNSP PGUID 4017  25 t 0 ));
 
 #endif   /* PG_OPCLASS_H */
diff --git a/src/include/catalog/pg_opfamily.h b/src/include/catalog/pg_opfamily.h
index 5ea949bec6b..009000ffcff 100644
--- a/src/include/catalog/pg_opfamily.h
+++ b/src/include/catalog/pg_opfamily.h
@@ -142,5 +142,8 @@ DATA(insert OID = 3702 (	783		tsquery_ops		PGNSP PGUID ));
 DATA(insert OID = 3901 (	403		range_ops		PGNSP PGUID ));
 DATA(insert OID = 3903 (	405		range_ops		PGNSP PGUID ));
 DATA(insert OID = 3919 (	783		range_ops		PGNSP PGUID ));
+DATA(insert OID = 4015 (	4000	quad_point_ops	PGNSP PGUID ));
+DATA(insert OID = 4016 (	4000	kd_point_ops	PGNSP PGUID ));
+DATA(insert OID = 4017 (	4000	text_ops		PGNSP PGUID ));
 
 #endif   /* PG_OPFAMILY_H */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 924cb1f601c..6da3b421ae3 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4481,6 +4481,68 @@ DESCR("int8range constructor");
 DATA(insert OID = 3946 (  int8range	PGNSP PGUID 12 1 0 0 0 f f f f f i 3 0 3926 "20 20 25" _null_ _null_ _null_ _null_ range_constructor3 _null_ _null_ _null_ ));
 DESCR("int8range constructor");
 
+/* spgist support functions */
+DATA(insert OID = 4001 (  spggettuple	   PGNSP PGUID 12 1 0 0 0 f f f t f v 2 0 16 "2281 2281" _null_ _null_ _null_ _null_	spggettuple _null_ _null_ _null_ ));
+DESCR("spgist(internal)");
+DATA(insert OID = 4002 (  spggetbitmap	   PGNSP PGUID 12 1 0 0 0 f f f t f v 2 0 20 "2281 2281" _null_ _null_ _null_ _null_	spggetbitmap _null_ _null_ _null_ ));
+DESCR("spgist(internal)");
+DATA(insert OID = 4003 (  spginsert		   PGNSP PGUID 12 1 0 0 0 f f f t f v 6 0 16 "2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_	spginsert _null_ _null_ _null_ ));
+DESCR("spgist(internal)");
+DATA(insert OID = 4004 (  spgbeginscan	   PGNSP PGUID 12 1 0 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_	spgbeginscan _null_ _null_ _null_ ));
+DESCR("spgist(internal)");
+DATA(insert OID = 4005 (  spgrescan		   PGNSP PGUID 12 1 0 0 0 f f f t f v 5 0 2278 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ spgrescan _null_ _null_ _null_ ));
+DESCR("spgist(internal)");
+DATA(insert OID = 4006 (  spgendscan	   PGNSP PGUID 12 1 0 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ spgendscan _null_ _null_ _null_ ));
+DESCR("spgist(internal)");
+DATA(insert OID = 4007 (  spgmarkpos	   PGNSP PGUID 12 1 0 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ spgmarkpos _null_ _null_ _null_ ));
+DESCR("spgist(internal)");
+DATA(insert OID = 4008 (  spgrestrpos	   PGNSP PGUID 12 1 0 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ spgrestrpos _null_ _null_ _null_ ));
+DESCR("spgist(internal)");
+DATA(insert OID = 4009 (  spgbuild		   PGNSP PGUID 12 1 0 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ spgbuild _null_ _null_ _null_ ));
+DESCR("spgist(internal)");
+DATA(insert OID = 4010 (  spgbuildempty    PGNSP PGUID 12 1 0 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ spgbuildempty _null_ _null_ _null_ ));
+DESCR("spgist(internal)");
+DATA(insert OID = 4011 (  spgbulkdelete    PGNSP PGUID 12 1 0 0 0 f f f t f v 4 0 2281 "2281 2281 2281 2281" _null_ _null_ _null_ _null_ spgbulkdelete _null_ _null_ _null_ ));
+DESCR("spgist(internal)");
+DATA(insert OID = 4012 (  spgvacuumcleanup   PGNSP PGUID 12 1 0 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ spgvacuumcleanup _null_ _null_ _null_ ));
+DESCR("spgist(internal)");
+DATA(insert OID = 4013 (  spgcostestimate  PGNSP PGUID 12 1 0 0 0 f f f t f v 9 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ spgcostestimate _null_ _null_ _null_ ));
+DESCR("spgist(internal)");
+DATA(insert OID = 4014 (  spgoptions	   PGNSP PGUID 12 1 0 0 0 f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_  spgoptions _null_ _null_ _null_ ));
+DESCR("spgist(internal)");
+
+/* spgist opclasses */
+DATA(insert OID = 4018 (  spg_quad_config	PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_  spg_quad_config _null_ _null_ _null_ ));
+DESCR("SP-GiST support for quad tree over point");
+DATA(insert OID = 4019 (  spg_quad_choose	PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_  spg_quad_choose _null_ _null_ _null_ ));
+DESCR("SP-GiST support for quad tree over point");
+DATA(insert OID = 4020 (  spg_quad_picksplit	PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_  spg_quad_picksplit _null_ _null_ _null_ ));
+DESCR("SP-GiST support for quad tree over point");
+DATA(insert OID = 4021 (  spg_quad_inner_consistent	PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_  spg_quad_inner_consistent _null_ _null_ _null_ ));
+DESCR("SP-GiST support for quad tree over point");
+DATA(insert OID = 4022 (  spg_quad_leaf_consistent	PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 16 "2281 2281" _null_ _null_ _null_ _null_  spg_quad_leaf_consistent _null_ _null_ _null_ ));
+DESCR("SP-GiST support for quad tree and k-d tree over point");
+
+DATA(insert OID = 4023 (  spg_kd_config	PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_  spg_kd_config _null_ _null_ _null_ ));
+DESCR("SP-GiST support for k-d tree over point");
+DATA(insert OID = 4024 (  spg_kd_choose	PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_  spg_kd_choose _null_ _null_ _null_ ));
+DESCR("SP-GiST support for k-d tree over point");
+DATA(insert OID = 4025 (  spg_kd_picksplit	PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_  spg_kd_picksplit _null_ _null_ _null_ ));
+DESCR("SP-GiST support for k-d tree over point");
+DATA(insert OID = 4026 (  spg_kd_inner_consistent	PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_  spg_kd_inner_consistent _null_ _null_ _null_ ));
+DESCR("SP-GiST support for k-d tree over point");
+
+DATA(insert OID = 4027 (  spg_text_config	PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_  spg_text_config _null_ _null_ _null_ ));
+DESCR("SP-GiST support for suffix tree over text");
+DATA(insert OID = 4028 (  spg_text_choose	PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_  spg_text_choose _null_ _null_ _null_ ));
+DESCR("SP-GiST support for suffix tree over text");
+DATA(insert OID = 4029 (  spg_text_picksplit	PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_  spg_text_picksplit _null_ _null_ _null_ ));
+DESCR("SP-GiST support for suffix tree over text");
+DATA(insert OID = 4030 (  spg_text_inner_consistent	PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_  spg_text_inner_consistent _null_ _null_ _null_ ));
+DESCR("SP-GiST support for suffix tree over text");
+DATA(insert OID = 4031 (  spg_text_leaf_consistent	PGNSP PGUID 12 1 0 0 0 f f f t f i 2 0 16 "2281 2281" _null_ _null_ _null_ _null_  spg_text_leaf_consistent _null_ _null_ _null_ ));
+DESCR("SP-GiST support for suffix tree over text");
+
 
 /*
  * Symbolic values for provolatile column: these indicate whether the result
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 994dc5368b1..9c5af5960fd 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -1080,6 +1080,26 @@ extern Datum window_first_value(PG_FUNCTION_ARGS);
 extern Datum window_last_value(PG_FUNCTION_ARGS);
 extern Datum window_nth_value(PG_FUNCTION_ARGS);
 
+/* access/spgist/spgquadtreeproc.c */
+extern Datum spg_quad_config(PG_FUNCTION_ARGS);
+extern Datum spg_quad_choose(PG_FUNCTION_ARGS);
+extern Datum spg_quad_picksplit(PG_FUNCTION_ARGS);
+extern Datum spg_quad_inner_consistent(PG_FUNCTION_ARGS);
+extern Datum spg_quad_leaf_consistent(PG_FUNCTION_ARGS);
+
+/* access/spgist/spgkdtreeproc.c */
+extern Datum spg_kd_config(PG_FUNCTION_ARGS);
+extern Datum spg_kd_choose(PG_FUNCTION_ARGS);
+extern Datum spg_kd_picksplit(PG_FUNCTION_ARGS);
+extern Datum spg_kd_inner_consistent(PG_FUNCTION_ARGS);
+
+/* access/spgist/spgtextproc.c */
+extern Datum spg_text_config(PG_FUNCTION_ARGS);
+extern Datum spg_text_choose(PG_FUNCTION_ARGS);
+extern Datum spg_text_picksplit(PG_FUNCTION_ARGS);
+extern Datum spg_text_inner_consistent(PG_FUNCTION_ARGS);
+extern Datum spg_text_leaf_consistent(PG_FUNCTION_ARGS);
+
 /* access/gin/ginarrayproc.c */
 extern Datum ginarrayextract(PG_FUNCTION_ARGS);
 extern Datum ginarrayextract_2args(PG_FUNCTION_ARGS);
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 32d14b60290..6afcbf47537 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -194,6 +194,7 @@ extern Selectivity estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey,
 extern Datum btcostestimate(PG_FUNCTION_ARGS);
 extern Datum hashcostestimate(PG_FUNCTION_ARGS);
 extern Datum gistcostestimate(PG_FUNCTION_ARGS);
+extern Datum spgcostestimate(PG_FUNCTION_ARGS);
 extern Datum gincostestimate(PG_FUNCTION_ARGS);
 
 #endif   /* SELFUNCS_H */
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index bdd1f4ec78e..86cee2de942 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -61,6 +61,26 @@ CREATE TEMP TABLE gcircle_tbl AS
     SELECT circle(home_base) AS f1 FROM slow_emp4000;
 CREATE INDEX ggpolygonind ON gpolygon_tbl USING gist (f1);
 CREATE INDEX ggcircleind ON gcircle_tbl USING gist (f1);
+--
+-- SP-GiST
+--
+CREATE TABLE quad_point_tbl AS
+    SELECT point(unique1,unique2) AS p FROM tenk1;
+INSERT INTO quad_point_tbl
+    SELECT '(333.0,400.0)'::point FROM generate_series(1,1000);
+CREATE INDEX sp_quad_ind ON quad_point_tbl USING spgist (p);
+CREATE TABLE kd_point_tbl AS SELECT * FROM quad_point_tbl;
+CREATE INDEX sp_kd_ind ON kd_point_tbl USING spgist (p kd_point_ops);
+CREATE TABLE suffix_text_tbl AS
+    SELECT name AS t FROM road;
+INSERT INTO suffix_text_tbl
+    SELECT '0123456789abcdef' FROM generate_series(1,1000);
+INSERT INTO suffix_text_tbl VALUES ('0123456789abcde');
+INSERT INTO suffix_text_tbl VALUES ('0123456789abcdefF');
+CREATE INDEX sp_suff_ind ON suffix_text_tbl USING spgist (t);
+--
+-- Test GiST and SP-GiST indexes
+--
 -- get non-indexed results for comparison purposes
 SET enable_seqscan = ON;
 SET enable_indexscan = OFF;
@@ -207,22 +227,141 @@ SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0
  (10,10)
 (4 rows)
 
+SELECT count(*) FROM quad_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+ count 
+-------
+  1057
+(1 row)
+
+SELECT count(*) FROM quad_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+ count 
+-------
+  1057
+(1 row)
+
+SELECT count(*) FROM quad_point_tbl WHERE p << '(5000, 4000)';
+ count 
+-------
+  6000
+(1 row)
+
+SELECT count(*) FROM quad_point_tbl WHERE p >> '(5000, 4000)';
+ count 
+-------
+  4999
+(1 row)
+
+SELECT count(*) FROM quad_point_tbl WHERE p <^ '(5000, 4000)';
+ count 
+-------
+  5000
+(1 row)
+
+SELECT count(*) FROM quad_point_tbl WHERE p >^ '(5000, 4000)';
+ count 
+-------
+  5999
+(1 row)
+
+SELECT count(*) FROM quad_point_tbl WHERE p ~= '(4585, 365)';
+ count 
+-------
+     1
+(1 row)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdef';
+ count 
+-------
+  1000
+(1 row)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcde';
+ count 
+-------
+     1
+(1 row)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdefF';
+ count 
+-------
+     1
+(1 row)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t <    'Aztec                         Ct  ';
+ count 
+-------
+  1705
+(1 row)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<~  'Aztec                         Ct  ';
+ count 
+-------
+  1705
+(1 row)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t <=   'Aztec                         Ct  ';
+ count 
+-------
+  1706
+(1 row)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<=~ 'Aztec                         Ct  ';
+ count 
+-------
+  1706
+(1 row)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Aztec                         Ct  ';
+ count 
+-------
+     1
+(1 row)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Worth                         St  ';
+ count 
+-------
+     2
+(1 row)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t >=   'Worth                         St  ';
+ count 
+-------
+    50
+(1 row)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>=~ 'Worth                         St  ';
+ count 
+-------
+    50
+(1 row)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t >    'Worth                         St  ';
+ count 
+-------
+    48
+(1 row)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>~  'Worth                         St  ';
+ count 
+-------
+    48
+(1 row)
+
+-- Now check the results from plain indexscan
 SET enable_seqscan = OFF;
 SET enable_indexscan = ON;
-SET enable_bitmapscan = ON;
+SET enable_bitmapscan = OFF;
 EXPLAIN (COSTS OFF)
 SELECT * FROM fast_emp4000
     WHERE home_base @ '(200,200),(2000,1000)'::box
     ORDER BY (home_base[0])[0];
-                              QUERY PLAN                              
-----------------------------------------------------------------------
+                           QUERY PLAN                           
+----------------------------------------------------------------
  Sort
    Sort Key: ((home_base[0])[0])
-   ->  Bitmap Heap Scan on fast_emp4000
-         Recheck Cond: (home_base @ '(2000,1000),(200,200)'::box)
-         ->  Bitmap Index Scan on grect2ind
-               Index Cond: (home_base @ '(2000,1000),(200,200)'::box)
-(6 rows)
+   ->  Index Scan using grect2ind on fast_emp4000
+         Index Cond: (home_base @ '(2000,1000),(200,200)'::box)
+(4 rows)
 
 SELECT * FROM fast_emp4000
     WHERE home_base @ '(200,200),(2000,1000)'::box
@@ -235,14 +374,12 @@ SELECT * FROM fast_emp4000
 
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM fast_emp4000 WHERE home_base && '(1000,1000,0,0)'::box;
-                            QUERY PLAN                             
--------------------------------------------------------------------
+                         QUERY PLAN                          
+-------------------------------------------------------------
  Aggregate
-   ->  Bitmap Heap Scan on fast_emp4000
-         Recheck Cond: (home_base && '(1000,1000),(0,0)'::box)
-         ->  Bitmap Index Scan on grect2ind
-               Index Cond: (home_base && '(1000,1000),(0,0)'::box)
-(5 rows)
+   ->  Index Scan using grect2ind on fast_emp4000
+         Index Cond: (home_base && '(1000,1000),(0,0)'::box)
+(3 rows)
 
 SELECT count(*) FROM fast_emp4000 WHERE home_base && '(1000,1000,0,0)'::box;
  count 
@@ -252,14 +389,12 @@ SELECT count(*) FROM fast_emp4000 WHERE home_base && '(1000,1000,0,0)'::box;
 
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM fast_emp4000 WHERE home_base IS NULL;
-                  QUERY PLAN                   
------------------------------------------------
+                    QUERY PLAN                    
+--------------------------------------------------
  Aggregate
-   ->  Bitmap Heap Scan on fast_emp4000
-         Recheck Cond: (home_base IS NULL)
-         ->  Bitmap Index Scan on grect2ind
-               Index Cond: (home_base IS NULL)
-(5 rows)
+   ->  Index Scan using grect2ind on fast_emp4000
+         Index Cond: (home_base IS NULL)
+(3 rows)
 
 SELECT count(*) FROM fast_emp4000 WHERE home_base IS NULL;
  count 
@@ -308,14 +443,12 @@ SELECT * FROM circle_tbl WHERE f1 && circle(point(1,-2), 1)
 
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM gpolygon_tbl WHERE f1 && '(1000,1000,0,0)'::polygon;
-                            QUERY PLAN                            
-------------------------------------------------------------------
+                         QUERY PLAN                         
+------------------------------------------------------------
  Aggregate
-   ->  Bitmap Heap Scan on gpolygon_tbl
-         Recheck Cond: (f1 && '((1000,1000),(0,0))'::polygon)
-         ->  Bitmap Index Scan on ggpolygonind
-               Index Cond: (f1 && '((1000,1000),(0,0))'::polygon)
-(5 rows)
+   ->  Index Scan using ggpolygonind on gpolygon_tbl
+         Index Cond: (f1 && '((1000,1000),(0,0))'::polygon)
+(3 rows)
 
 SELECT count(*) FROM gpolygon_tbl WHERE f1 && '(1000,1000,0,0)'::polygon;
  count 
@@ -325,14 +458,12 @@ SELECT count(*) FROM gpolygon_tbl WHERE f1 && '(1000,1000,0,0)'::polygon;
 
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM gcircle_tbl WHERE f1 && '<(500,500),500>'::circle;
-                         QUERY PLAN                          
--------------------------------------------------------------
+                      QUERY PLAN                       
+-------------------------------------------------------
  Aggregate
-   ->  Bitmap Heap Scan on gcircle_tbl
-         Recheck Cond: (f1 && '<(500,500),500>'::circle)
-         ->  Bitmap Index Scan on ggcircleind
-               Index Cond: (f1 && '<(500,500),500>'::circle)
-(5 rows)
+   ->  Index Scan using ggcircleind on gcircle_tbl
+         Index Cond: (f1 && '<(500,500),500>'::circle)
+(3 rows)
 
 SELECT count(*) FROM gcircle_tbl WHERE f1 && '<(500,500),500>'::circle;
  count 
@@ -547,6 +678,412 @@ SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0
  (10,10)
 (4 rows)
 
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_quad_ind on quad_point_tbl
+         Index Cond: (p <@ '(1000,1000),(200,200)'::box)
+(3 rows)
+
+SELECT count(*) FROM quad_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+ count 
+-------
+  1057
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_quad_ind on quad_point_tbl
+         Index Cond: ('(1000,1000),(200,200)'::box @> p)
+(3 rows)
+
+SELECT count(*) FROM quad_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+ count 
+-------
+  1057
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p << '(5000, 4000)';
+                      QUERY PLAN                      
+------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_quad_ind on quad_point_tbl
+         Index Cond: (p << '(5000,4000)'::point)
+(3 rows)
+
+SELECT count(*) FROM quad_point_tbl WHERE p << '(5000, 4000)';
+ count 
+-------
+  6000
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p >> '(5000, 4000)';
+                      QUERY PLAN                      
+------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_quad_ind on quad_point_tbl
+         Index Cond: (p >> '(5000,4000)'::point)
+(3 rows)
+
+SELECT count(*) FROM quad_point_tbl WHERE p >> '(5000, 4000)';
+ count 
+-------
+  4999
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p <^ '(5000, 4000)';
+                      QUERY PLAN                      
+------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_quad_ind on quad_point_tbl
+         Index Cond: (p <^ '(5000,4000)'::point)
+(3 rows)
+
+SELECT count(*) FROM quad_point_tbl WHERE p <^ '(5000, 4000)';
+ count 
+-------
+  5000
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p >^ '(5000, 4000)';
+                      QUERY PLAN                      
+------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_quad_ind on quad_point_tbl
+         Index Cond: (p >^ '(5000,4000)'::point)
+(3 rows)
+
+SELECT count(*) FROM quad_point_tbl WHERE p >^ '(5000, 4000)';
+ count 
+-------
+  5999
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p ~= '(4585, 365)';
+                      QUERY PLAN                      
+------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_quad_ind on quad_point_tbl
+         Index Cond: (p ~= '(4585,365)'::point)
+(3 rows)
+
+SELECT count(*) FROM quad_point_tbl WHERE p ~= '(4585, 365)';
+ count 
+-------
+     1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_kd_ind on kd_point_tbl
+         Index Cond: (p <@ '(1000,1000),(200,200)'::box)
+(3 rows)
+
+SELECT count(*) FROM kd_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+ count 
+-------
+  1057
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_kd_ind on kd_point_tbl
+         Index Cond: ('(1000,1000),(200,200)'::box @> p)
+(3 rows)
+
+SELECT count(*) FROM kd_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+ count 
+-------
+  1057
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p << '(5000, 4000)';
+                    QUERY PLAN                    
+--------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_kd_ind on kd_point_tbl
+         Index Cond: (p << '(5000,4000)'::point)
+(3 rows)
+
+SELECT count(*) FROM kd_point_tbl WHERE p << '(5000, 4000)';
+ count 
+-------
+  6000
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p >> '(5000, 4000)';
+                    QUERY PLAN                    
+--------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_kd_ind on kd_point_tbl
+         Index Cond: (p >> '(5000,4000)'::point)
+(3 rows)
+
+SELECT count(*) FROM kd_point_tbl WHERE p >> '(5000, 4000)';
+ count 
+-------
+  4999
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p <^ '(5000, 4000)';
+                    QUERY PLAN                    
+--------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_kd_ind on kd_point_tbl
+         Index Cond: (p <^ '(5000,4000)'::point)
+(3 rows)
+
+SELECT count(*) FROM kd_point_tbl WHERE p <^ '(5000, 4000)';
+ count 
+-------
+  5000
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p >^ '(5000, 4000)';
+                    QUERY PLAN                    
+--------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_kd_ind on kd_point_tbl
+         Index Cond: (p >^ '(5000,4000)'::point)
+(3 rows)
+
+SELECT count(*) FROM kd_point_tbl WHERE p >^ '(5000, 4000)';
+ count 
+-------
+  5999
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p ~= '(4585, 365)';
+                    QUERY PLAN                    
+--------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_kd_ind on kd_point_tbl
+         Index Cond: (p ~= '(4585,365)'::point)
+(3 rows)
+
+SELECT count(*) FROM kd_point_tbl WHERE p ~= '(4585, 365)';
+ count 
+-------
+     1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdef';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_suff_ind on suffix_text_tbl
+         Index Cond: (t = '0123456789abcdef'::text)
+(3 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdef';
+ count 
+-------
+  1000
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcde';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_suff_ind on suffix_text_tbl
+         Index Cond: (t = '0123456789abcde'::text)
+(3 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcde';
+ count 
+-------
+     1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdefF';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_suff_ind on suffix_text_tbl
+         Index Cond: (t = '0123456789abcdefF'::text)
+(3 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdefF';
+ count 
+-------
+     1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t <    'Aztec                         Ct  ';
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_suff_ind on suffix_text_tbl
+         Index Cond: (t < 'Aztec                         Ct  '::text)
+(3 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t <    'Aztec                         Ct  ';
+ count 
+-------
+  1705
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<~  'Aztec                         Ct  ';
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_suff_ind on suffix_text_tbl
+         Index Cond: (t ~<~ 'Aztec                         Ct  '::text)
+(3 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<~  'Aztec                         Ct  ';
+ count 
+-------
+  1705
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t <=   'Aztec                         Ct  ';
+                              QUERY PLAN                               
+-----------------------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_suff_ind on suffix_text_tbl
+         Index Cond: (t <= 'Aztec                         Ct  '::text)
+(3 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t <=   'Aztec                         Ct  ';
+ count 
+-------
+  1706
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<=~ 'Aztec                         Ct  ';
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_suff_ind on suffix_text_tbl
+         Index Cond: (t ~<=~ 'Aztec                         Ct  '::text)
+(3 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<=~ 'Aztec                         Ct  ';
+ count 
+-------
+  1706
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Aztec                         Ct  ';
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_suff_ind on suffix_text_tbl
+         Index Cond: (t = 'Aztec                         Ct  '::text)
+(3 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Aztec                         Ct  ';
+ count 
+-------
+     1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Worth                         St  ';
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_suff_ind on suffix_text_tbl
+         Index Cond: (t = 'Worth                         St  '::text)
+(3 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Worth                         St  ';
+ count 
+-------
+     2
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t >=   'Worth                         St  ';
+                              QUERY PLAN                               
+-----------------------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_suff_ind on suffix_text_tbl
+         Index Cond: (t >= 'Worth                         St  '::text)
+(3 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t >=   'Worth                         St  ';
+ count 
+-------
+    50
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>=~ 'Worth                         St  ';
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_suff_ind on suffix_text_tbl
+         Index Cond: (t ~>=~ 'Worth                         St  '::text)
+(3 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>=~ 'Worth                         St  ';
+ count 
+-------
+    50
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t >    'Worth                         St  ';
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_suff_ind on suffix_text_tbl
+         Index Cond: (t > 'Worth                         St  '::text)
+(3 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t >    'Worth                         St  ';
+ count 
+-------
+    48
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>~  'Worth                         St  ';
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Aggregate
+   ->  Index Scan using sp_suff_ind on suffix_text_tbl
+         Index Cond: (t ~>~ 'Worth                         St  '::text)
+(3 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>~  'Worth                         St  ';
+ count 
+-------
+    48
+(1 row)
+
+-- Now check the results from bitmap indexscan
 SET enable_seqscan = OFF;
 SET enable_indexscan = OFF;
 SET enable_bitmapscan = ON;
@@ -571,6 +1108,465 @@ SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0
  (10,10)
 (4 rows)
 
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on quad_point_tbl
+         Recheck Cond: (p <@ '(1000,1000),(200,200)'::box)
+         ->  Bitmap Index Scan on sp_quad_ind
+               Index Cond: (p <@ '(1000,1000),(200,200)'::box)
+(5 rows)
+
+SELECT count(*) FROM quad_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+ count 
+-------
+  1057
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on quad_point_tbl
+         Recheck Cond: ('(1000,1000),(200,200)'::box @> p)
+         ->  Bitmap Index Scan on sp_quad_ind
+               Index Cond: ('(1000,1000),(200,200)'::box @> p)
+(5 rows)
+
+SELECT count(*) FROM quad_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+ count 
+-------
+  1057
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p << '(5000, 4000)';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on quad_point_tbl
+         Recheck Cond: (p << '(5000,4000)'::point)
+         ->  Bitmap Index Scan on sp_quad_ind
+               Index Cond: (p << '(5000,4000)'::point)
+(5 rows)
+
+SELECT count(*) FROM quad_point_tbl WHERE p << '(5000, 4000)';
+ count 
+-------
+  6000
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p >> '(5000, 4000)';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on quad_point_tbl
+         Recheck Cond: (p >> '(5000,4000)'::point)
+         ->  Bitmap Index Scan on sp_quad_ind
+               Index Cond: (p >> '(5000,4000)'::point)
+(5 rows)
+
+SELECT count(*) FROM quad_point_tbl WHERE p >> '(5000, 4000)';
+ count 
+-------
+  4999
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p <^ '(5000, 4000)';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on quad_point_tbl
+         Recheck Cond: (p <^ '(5000,4000)'::point)
+         ->  Bitmap Index Scan on sp_quad_ind
+               Index Cond: (p <^ '(5000,4000)'::point)
+(5 rows)
+
+SELECT count(*) FROM quad_point_tbl WHERE p <^ '(5000, 4000)';
+ count 
+-------
+  5000
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p >^ '(5000, 4000)';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on quad_point_tbl
+         Recheck Cond: (p >^ '(5000,4000)'::point)
+         ->  Bitmap Index Scan on sp_quad_ind
+               Index Cond: (p >^ '(5000,4000)'::point)
+(5 rows)
+
+SELECT count(*) FROM quad_point_tbl WHERE p >^ '(5000, 4000)';
+ count 
+-------
+  5999
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p ~= '(4585, 365)';
+                      QUERY PLAN                      
+------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on quad_point_tbl
+         Recheck Cond: (p ~= '(4585,365)'::point)
+         ->  Bitmap Index Scan on sp_quad_ind
+               Index Cond: (p ~= '(4585,365)'::point)
+(5 rows)
+
+SELECT count(*) FROM quad_point_tbl WHERE p ~= '(4585, 365)';
+ count 
+-------
+     1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on kd_point_tbl
+         Recheck Cond: (p <@ '(1000,1000),(200,200)'::box)
+         ->  Bitmap Index Scan on sp_kd_ind
+               Index Cond: (p <@ '(1000,1000),(200,200)'::box)
+(5 rows)
+
+SELECT count(*) FROM kd_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+ count 
+-------
+  1057
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on kd_point_tbl
+         Recheck Cond: ('(1000,1000),(200,200)'::box @> p)
+         ->  Bitmap Index Scan on sp_kd_ind
+               Index Cond: ('(1000,1000),(200,200)'::box @> p)
+(5 rows)
+
+SELECT count(*) FROM kd_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+ count 
+-------
+  1057
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p << '(5000, 4000)';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on kd_point_tbl
+         Recheck Cond: (p << '(5000,4000)'::point)
+         ->  Bitmap Index Scan on sp_kd_ind
+               Index Cond: (p << '(5000,4000)'::point)
+(5 rows)
+
+SELECT count(*) FROM kd_point_tbl WHERE p << '(5000, 4000)';
+ count 
+-------
+  6000
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p >> '(5000, 4000)';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on kd_point_tbl
+         Recheck Cond: (p >> '(5000,4000)'::point)
+         ->  Bitmap Index Scan on sp_kd_ind
+               Index Cond: (p >> '(5000,4000)'::point)
+(5 rows)
+
+SELECT count(*) FROM kd_point_tbl WHERE p >> '(5000, 4000)';
+ count 
+-------
+  4999
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p <^ '(5000, 4000)';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on kd_point_tbl
+         Recheck Cond: (p <^ '(5000,4000)'::point)
+         ->  Bitmap Index Scan on sp_kd_ind
+               Index Cond: (p <^ '(5000,4000)'::point)
+(5 rows)
+
+SELECT count(*) FROM kd_point_tbl WHERE p <^ '(5000, 4000)';
+ count 
+-------
+  5000
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p >^ '(5000, 4000)';
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on kd_point_tbl
+         Recheck Cond: (p >^ '(5000,4000)'::point)
+         ->  Bitmap Index Scan on sp_kd_ind
+               Index Cond: (p >^ '(5000,4000)'::point)
+(5 rows)
+
+SELECT count(*) FROM kd_point_tbl WHERE p >^ '(5000, 4000)';
+ count 
+-------
+  5999
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p ~= '(4585, 365)';
+                      QUERY PLAN                      
+------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on kd_point_tbl
+         Recheck Cond: (p ~= '(4585,365)'::point)
+         ->  Bitmap Index Scan on sp_kd_ind
+               Index Cond: (p ~= '(4585,365)'::point)
+(5 rows)
+
+SELECT count(*) FROM kd_point_tbl WHERE p ~= '(4585, 365)';
+ count 
+-------
+     1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdef';
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on suffix_text_tbl
+         Recheck Cond: (t = '0123456789abcdef'::text)
+         ->  Bitmap Index Scan on sp_suff_ind
+               Index Cond: (t = '0123456789abcdef'::text)
+(5 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdef';
+ count 
+-------
+  1000
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcde';
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on suffix_text_tbl
+         Recheck Cond: (t = '0123456789abcde'::text)
+         ->  Bitmap Index Scan on sp_suff_ind
+               Index Cond: (t = '0123456789abcde'::text)
+(5 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcde';
+ count 
+-------
+     1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdefF';
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on suffix_text_tbl
+         Recheck Cond: (t = '0123456789abcdefF'::text)
+         ->  Bitmap Index Scan on sp_suff_ind
+               Index Cond: (t = '0123456789abcdefF'::text)
+(5 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdefF';
+ count 
+-------
+     1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t <    'Aztec                         Ct  ';
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on suffix_text_tbl
+         Recheck Cond: (t < 'Aztec                         Ct  '::text)
+         ->  Bitmap Index Scan on sp_suff_ind
+               Index Cond: (t < 'Aztec                         Ct  '::text)
+(5 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t <    'Aztec                         Ct  ';
+ count 
+-------
+  1705
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<~  'Aztec                         Ct  ';
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on suffix_text_tbl
+         Recheck Cond: (t ~<~ 'Aztec                         Ct  '::text)
+         ->  Bitmap Index Scan on sp_suff_ind
+               Index Cond: (t ~<~ 'Aztec                         Ct  '::text)
+(5 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<~  'Aztec                         Ct  ';
+ count 
+-------
+  1705
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t <=   'Aztec                         Ct  ';
+                                 QUERY PLAN                                  
+-----------------------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on suffix_text_tbl
+         Recheck Cond: (t <= 'Aztec                         Ct  '::text)
+         ->  Bitmap Index Scan on sp_suff_ind
+               Index Cond: (t <= 'Aztec                         Ct  '::text)
+(5 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t <=   'Aztec                         Ct  ';
+ count 
+-------
+  1706
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<=~ 'Aztec                         Ct  ';
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on suffix_text_tbl
+         Recheck Cond: (t ~<=~ 'Aztec                         Ct  '::text)
+         ->  Bitmap Index Scan on sp_suff_ind
+               Index Cond: (t ~<=~ 'Aztec                         Ct  '::text)
+(5 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<=~ 'Aztec                         Ct  ';
+ count 
+-------
+  1706
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Aztec                         Ct  ';
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on suffix_text_tbl
+         Recheck Cond: (t = 'Aztec                         Ct  '::text)
+         ->  Bitmap Index Scan on sp_suff_ind
+               Index Cond: (t = 'Aztec                         Ct  '::text)
+(5 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Aztec                         Ct  ';
+ count 
+-------
+     1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Worth                         St  ';
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on suffix_text_tbl
+         Recheck Cond: (t = 'Worth                         St  '::text)
+         ->  Bitmap Index Scan on sp_suff_ind
+               Index Cond: (t = 'Worth                         St  '::text)
+(5 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Worth                         St  ';
+ count 
+-------
+     2
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t >=   'Worth                         St  ';
+                                 QUERY PLAN                                  
+-----------------------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on suffix_text_tbl
+         Recheck Cond: (t >= 'Worth                         St  '::text)
+         ->  Bitmap Index Scan on sp_suff_ind
+               Index Cond: (t >= 'Worth                         St  '::text)
+(5 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t >=   'Worth                         St  ';
+ count 
+-------
+    50
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>=~ 'Worth                         St  ';
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on suffix_text_tbl
+         Recheck Cond: (t ~>=~ 'Worth                         St  '::text)
+         ->  Bitmap Index Scan on sp_suff_ind
+               Index Cond: (t ~>=~ 'Worth                         St  '::text)
+(5 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>=~ 'Worth                         St  ';
+ count 
+-------
+    50
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t >    'Worth                         St  ';
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on suffix_text_tbl
+         Recheck Cond: (t > 'Worth                         St  '::text)
+         ->  Bitmap Index Scan on sp_suff_ind
+               Index Cond: (t > 'Worth                         St  '::text)
+(5 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t >    'Worth                         St  ';
+ count 
+-------
+    48
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>~  'Worth                         St  ';
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on suffix_text_tbl
+         Recheck Cond: (t ~>~ 'Worth                         St  '::text)
+         ->  Bitmap Index Scan on sp_suff_ind
+               Index Cond: (t ~>~ 'Worth                         St  '::text)
+(5 rows)
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>~  'Worth                         St  ';
+ count 
+-------
+    48
+(1 row)
+
 RESET enable_seqscan;
 RESET enable_indexscan;
 RESET enable_bitmapscan;
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index a0ffd77e0ed..8e4004ed311 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -1053,7 +1053,22 @@ ORDER BY 1, 2, 3;
        2742 |            2 | @@@
        2742 |            3 | <@
        2742 |            4 | =
-(43 rows)
+       4000 |            1 | <<
+       4000 |            1 | ~<~
+       4000 |            2 | ~<=~
+       4000 |            3 | =
+       4000 |            4 | ~>=~
+       4000 |            5 | >>
+       4000 |            5 | ~>~
+       4000 |            6 | ~=
+       4000 |            8 | <@
+       4000 |           10 | <^
+       4000 |           11 | <
+       4000 |           11 | >^
+       4000 |           12 | <=
+       4000 |           14 | >=
+       4000 |           15 | >
+(58 rows)
 
 -- Check that all opclass search operators have selectivity estimators.
 -- This is not absolutely required, but it seems a reasonable thing
@@ -1077,6 +1092,24 @@ WHERE NOT EXISTS(SELECT 1 FROM pg_amop AS p2
 ---------+-----------
 (0 rows)
 
+-- Check that each operator listed in pg_amop has an associated opclass,
+-- that is one whose opcintype matches oprleft (possibly by coercion).
+-- Otherwise the operator is useless because it cannot be matched to an index.
+-- (In principle it could be useful to list such operators in multiple-datatype
+-- btree opfamilies, but in practice you'd expect there to be an opclass for
+-- every datatype the family knows about.)
+SELECT p1.amopfamily, p1.amopstrategy, p1.amopopr
+FROM pg_amop AS p1
+WHERE NOT EXISTS(SELECT 1 FROM pg_opclass AS p2
+                 WHERE p2.opcfamily = p1.amopfamily
+                   AND binary_coercible(p2.opcintype, p1.amoplefttype));
+ amopfamily | amopstrategy | amopopr 
+------------+--------------+---------
+       1029 |           27 |     433
+       1029 |           47 |     757
+       1029 |           67 |     759
+(3 rows)
+
 -- Operators that are primary members of opclasses must be immutable (else
 -- it suggests that the index ordering isn't fixed).  Operators that are
 -- cross-type members need only be stable, since they are just shorthands
@@ -1297,6 +1330,27 @@ ORDER BY 1;
          2226 |         1 | hashint4       | cid_ops
 (6 rows)
 
+-- We can also check SP-GiST carefully, since the support routine signatures
+-- are independent of the datatype being indexed.
+SELECT p1.amprocfamily, p1.amprocnum,
+	p2.oid, p2.proname,
+	p3.opfname
+FROM pg_amproc AS p1, pg_proc AS p2, pg_opfamily AS p3
+WHERE p3.opfmethod = (SELECT oid FROM pg_am WHERE amname = 'spgist')
+    AND p1.amprocfamily = p3.oid AND p1.amproc = p2.oid AND
+    (CASE WHEN amprocnum = 1 OR amprocnum = 2 OR amprocnum = 3 OR amprocnum = 4
+          THEN prorettype != 'void'::regtype OR proretset OR pronargs != 2
+               OR proargtypes[0] != 'internal'::regtype
+               OR proargtypes[1] != 'internal'::regtype
+          WHEN amprocnum = 5
+          THEN prorettype != 'bool'::regtype OR proretset OR pronargs != 2
+               OR proargtypes[0] != 'internal'::regtype
+               OR proargtypes[1] != 'internal'::regtype
+          ELSE true END);
+ amprocfamily | amprocnum | oid | proname | opfname 
+--------------+-----------+-----+---------+---------
+(0 rows)
+
 -- Support routines that are primary members of opfamilies must be immutable
 -- (else it suggests that the index ordering isn't fixed).  But cross-type
 -- members need only be stable, since they are just shorthands
diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out
index cb468e58b91..9cae9d8bf10 100644
--- a/src/test/regress/expected/sanity_check.out
+++ b/src/test/regress/expected/sanity_check.out
@@ -63,6 +63,7 @@ SELECT relname, relhasindex
  int8_tbl                | f
  interval_tbl            | f
  iportaltest             | f
+ kd_point_tbl            | t
  log_table               | f
  lseg_tbl                | f
  main_table              | f
@@ -134,6 +135,7 @@ SELECT relname, relhasindex
  pg_user_mapping         | t
  point_tbl               | t
  polygon_tbl             | t
+ quad_point_tbl          | t
  ramp                    | f
  real_city               | f
  reltime_tbl             | f
@@ -149,6 +151,7 @@ SELECT relname, relhasindex
  sql_sizing_profiles     | f
  stud_emp                | f
  student                 | f
+ suffix_text_tbl         | t
  tenk1                   | t
  tenk2                   | t
  test_range_excl         | t
@@ -161,7 +164,7 @@ SELECT relname, relhasindex
  timetz_tbl              | f
  tinterval_tbl           | f
  varchar_tbl             | f
-(150 rows)
+(153 rows)
 
 --
 -- another sanity check: every system catalog that has OIDs should have
diff --git a/src/test/regress/output/misc.source b/src/test/regress/output/misc.source
index 45bc926407d..b57c5546ded 100644
--- a/src/test/regress/output/misc.source
+++ b/src/test/regress/output/misc.source
@@ -636,6 +636,7 @@ SELECT user_relns() AS user_relns
  int8_tbl
  interval_tbl
  iportaltest
+ kd_point_tbl
  log_table
  lseg_tbl
  main_table
@@ -657,6 +658,7 @@ SELECT user_relns() AS user_relns
  person
  point_tbl
  polygon_tbl
+ quad_point_tbl
  ramp
  random_tbl
  real_city
@@ -668,6 +670,7 @@ SELECT user_relns() AS user_relns
  stud_emp
  student
  subselect_tbl
+ suffix_text_tbl
  tenk1
  tenk2
  test_range_excl
@@ -682,7 +685,7 @@ SELECT user_relns() AS user_relns
  toyemp
  varchar_tbl
  xacttest
-(104 rows)
+(107 rows)
 
 SELECT name(equipment(hobby_construct(text 'skywalking', text 'mer')));
  name 
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index 85cf23ccb8f..babde51d2c3 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -92,6 +92,36 @@ CREATE INDEX ggpolygonind ON gpolygon_tbl USING gist (f1);
 
 CREATE INDEX ggcircleind ON gcircle_tbl USING gist (f1);
 
+--
+-- SP-GiST
+--
+
+CREATE TABLE quad_point_tbl AS
+    SELECT point(unique1,unique2) AS p FROM tenk1;
+
+INSERT INTO quad_point_tbl
+    SELECT '(333.0,400.0)'::point FROM generate_series(1,1000);
+
+CREATE INDEX sp_quad_ind ON quad_point_tbl USING spgist (p);
+
+CREATE TABLE kd_point_tbl AS SELECT * FROM quad_point_tbl;
+
+CREATE INDEX sp_kd_ind ON kd_point_tbl USING spgist (p kd_point_ops);
+
+CREATE TABLE suffix_text_tbl AS
+    SELECT name AS t FROM road;
+
+INSERT INTO suffix_text_tbl
+    SELECT '0123456789abcdef' FROM generate_series(1,1000);
+INSERT INTO suffix_text_tbl VALUES ('0123456789abcde');
+INSERT INTO suffix_text_tbl VALUES ('0123456789abcdefF');
+
+CREATE INDEX sp_suff_ind ON suffix_text_tbl USING spgist (t);
+
+--
+-- Test GiST and SP-GiST indexes
+--
+
 -- get non-indexed results for comparison purposes
 
 SET enable_seqscan = ON;
@@ -142,9 +172,50 @@ SELECT * FROM point_tbl WHERE f1 IS NOT NULL ORDER BY f1 <-> '0,1';
 
 SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1';
 
+SELECT count(*) FROM quad_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+
+SELECT count(*) FROM quad_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+
+SELECT count(*) FROM quad_point_tbl WHERE p << '(5000, 4000)';
+
+SELECT count(*) FROM quad_point_tbl WHERE p >> '(5000, 4000)';
+
+SELECT count(*) FROM quad_point_tbl WHERE p <^ '(5000, 4000)';
+
+SELECT count(*) FROM quad_point_tbl WHERE p >^ '(5000, 4000)';
+
+SELECT count(*) FROM quad_point_tbl WHERE p ~= '(4585, 365)';
+
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdef';
+
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcde';
+
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdefF';
+
+SELECT count(*) FROM suffix_text_tbl WHERE t <    'Aztec                         Ct  ';
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<~  'Aztec                         Ct  ';
+
+SELECT count(*) FROM suffix_text_tbl WHERE t <=   'Aztec                         Ct  ';
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<=~ 'Aztec                         Ct  ';
+
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Aztec                         Ct  ';
+
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Worth                         St  ';
+
+SELECT count(*) FROM suffix_text_tbl WHERE t >=   'Worth                         St  ';
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>=~ 'Worth                         St  ';
+
+SELECT count(*) FROM suffix_text_tbl WHERE t >    'Worth                         St  ';
+
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>~  'Worth                         St  ';
+
+-- Now check the results from plain indexscan
 SET enable_seqscan = OFF;
 SET enable_indexscan = ON;
-SET enable_bitmapscan = ON;
+SET enable_bitmapscan = OFF;
 
 EXPLAIN (COSTS OFF)
 SELECT * FROM fast_emp4000
@@ -234,6 +305,115 @@ EXPLAIN (COSTS OFF)
 SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1';
 SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1';
 
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+SELECT count(*) FROM quad_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+SELECT count(*) FROM quad_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p << '(5000, 4000)';
+SELECT count(*) FROM quad_point_tbl WHERE p << '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p >> '(5000, 4000)';
+SELECT count(*) FROM quad_point_tbl WHERE p >> '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p <^ '(5000, 4000)';
+SELECT count(*) FROM quad_point_tbl WHERE p <^ '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p >^ '(5000, 4000)';
+SELECT count(*) FROM quad_point_tbl WHERE p >^ '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p ~= '(4585, 365)';
+SELECT count(*) FROM quad_point_tbl WHERE p ~= '(4585, 365)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+SELECT count(*) FROM kd_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+SELECT count(*) FROM kd_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p << '(5000, 4000)';
+SELECT count(*) FROM kd_point_tbl WHERE p << '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p >> '(5000, 4000)';
+SELECT count(*) FROM kd_point_tbl WHERE p >> '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p <^ '(5000, 4000)';
+SELECT count(*) FROM kd_point_tbl WHERE p <^ '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p >^ '(5000, 4000)';
+SELECT count(*) FROM kd_point_tbl WHERE p >^ '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p ~= '(4585, 365)';
+SELECT count(*) FROM kd_point_tbl WHERE p ~= '(4585, 365)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdef';
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdef';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcde';
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcde';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdefF';
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdefF';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t <    'Aztec                         Ct  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t <    'Aztec                         Ct  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<~  'Aztec                         Ct  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<~  'Aztec                         Ct  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t <=   'Aztec                         Ct  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t <=   'Aztec                         Ct  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<=~ 'Aztec                         Ct  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<=~ 'Aztec                         Ct  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Aztec                         Ct  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Aztec                         Ct  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Worth                         St  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Worth                         St  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t >=   'Worth                         St  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t >=   'Worth                         St  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>=~ 'Worth                         St  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>=~ 'Worth                         St  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t >    'Worth                         St  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t >    'Worth                         St  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>~  'Worth                         St  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>~  'Worth                         St  ';
+
+-- Now check the results from bitmap indexscan
 SET enable_seqscan = OFF;
 SET enable_indexscan = OFF;
 SET enable_bitmapscan = ON;
@@ -242,6 +422,114 @@ EXPLAIN (COSTS OFF)
 SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1';
 SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1';
 
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+SELECT count(*) FROM quad_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+SELECT count(*) FROM quad_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p << '(5000, 4000)';
+SELECT count(*) FROM quad_point_tbl WHERE p << '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p >> '(5000, 4000)';
+SELECT count(*) FROM quad_point_tbl WHERE p >> '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p <^ '(5000, 4000)';
+SELECT count(*) FROM quad_point_tbl WHERE p <^ '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p >^ '(5000, 4000)';
+SELECT count(*) FROM quad_point_tbl WHERE p >^ '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM quad_point_tbl WHERE p ~= '(4585, 365)';
+SELECT count(*) FROM quad_point_tbl WHERE p ~= '(4585, 365)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+SELECT count(*) FROM kd_point_tbl WHERE p <@ box '(200,200,1000,1000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+SELECT count(*) FROM kd_point_tbl WHERE box '(200,200,1000,1000)' @> p;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p << '(5000, 4000)';
+SELECT count(*) FROM kd_point_tbl WHERE p << '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p >> '(5000, 4000)';
+SELECT count(*) FROM kd_point_tbl WHERE p >> '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p <^ '(5000, 4000)';
+SELECT count(*) FROM kd_point_tbl WHERE p <^ '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p >^ '(5000, 4000)';
+SELECT count(*) FROM kd_point_tbl WHERE p >^ '(5000, 4000)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM kd_point_tbl WHERE p ~= '(4585, 365)';
+SELECT count(*) FROM kd_point_tbl WHERE p ~= '(4585, 365)';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdef';
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdef';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcde';
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcde';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdefF';
+SELECT count(*) FROM suffix_text_tbl WHERE t = '0123456789abcdefF';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t <    'Aztec                         Ct  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t <    'Aztec                         Ct  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<~  'Aztec                         Ct  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<~  'Aztec                         Ct  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t <=   'Aztec                         Ct  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t <=   'Aztec                         Ct  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<=~ 'Aztec                         Ct  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t ~<=~ 'Aztec                         Ct  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Aztec                         Ct  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Aztec                         Ct  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Worth                         St  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t =    'Worth                         St  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t >=   'Worth                         St  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t >=   'Worth                         St  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>=~ 'Worth                         St  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>=~ 'Worth                         St  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t >    'Worth                         St  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t >    'Worth                         St  ';
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>~  'Worth                         St  ';
+SELECT count(*) FROM suffix_text_tbl WHERE t ~>~  'Worth                         St  ';
+
 RESET enable_seqscan;
 RESET enable_indexscan;
 RESET enable_bitmapscan;
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql
index 6a79ea180c1..e29148fd5bd 100644
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -831,6 +831,19 @@ WHERE NOT EXISTS(SELECT 1 FROM pg_amop AS p2
                  WHERE p2.amopfamily = p1.opcfamily
                    AND binary_coercible(p1.opcintype, p2.amoplefttype));
 
+-- Check that each operator listed in pg_amop has an associated opclass,
+-- that is one whose opcintype matches oprleft (possibly by coercion).
+-- Otherwise the operator is useless because it cannot be matched to an index.
+-- (In principle it could be useful to list such operators in multiple-datatype
+-- btree opfamilies, but in practice you'd expect there to be an opclass for
+-- every datatype the family knows about.)
+
+SELECT p1.amopfamily, p1.amopstrategy, p1.amopopr
+FROM pg_amop AS p1
+WHERE NOT EXISTS(SELECT 1 FROM pg_opclass AS p2
+                 WHERE p2.opcfamily = p1.amopfamily
+                   AND binary_coercible(p2.opcintype, p1.amoplefttype));
+
 -- Operators that are primary members of opclasses must be immutable (else
 -- it suggests that the index ordering isn't fixed).  Operators that are
 -- cross-type members need only be stable, since they are just shorthands
@@ -1018,6 +1031,25 @@ WHERE p3.opfmethod = (SELECT oid FROM pg_am WHERE amname = 'hash')
      OR amproclefttype != amprocrighttype)
 ORDER BY 1;
 
+-- We can also check SP-GiST carefully, since the support routine signatures
+-- are independent of the datatype being indexed.
+
+SELECT p1.amprocfamily, p1.amprocnum,
+	p2.oid, p2.proname,
+	p3.opfname
+FROM pg_amproc AS p1, pg_proc AS p2, pg_opfamily AS p3
+WHERE p3.opfmethod = (SELECT oid FROM pg_am WHERE amname = 'spgist')
+    AND p1.amprocfamily = p3.oid AND p1.amproc = p2.oid AND
+    (CASE WHEN amprocnum = 1 OR amprocnum = 2 OR amprocnum = 3 OR amprocnum = 4
+          THEN prorettype != 'void'::regtype OR proretset OR pronargs != 2
+               OR proargtypes[0] != 'internal'::regtype
+               OR proargtypes[1] != 'internal'::regtype
+          WHEN amprocnum = 5
+          THEN prorettype != 'bool'::regtype OR proretset OR pronargs != 2
+               OR proargtypes[0] != 'internal'::regtype
+               OR proargtypes[1] != 'internal'::regtype
+          ELSE true END);
+
 -- Support routines that are primary members of opfamilies must be immutable
 -- (else it suggests that the index ordering isn't fixed).  But cross-type
 -- members need only be stable, since they are just shorthands