aboutsummaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2013-11-27 19:21:23 +0200
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2013-11-27 19:21:23 +0200
commit631118fe1e8f66fd15eb0013dd9940d9146a5096 (patch)
treebff4834fef6fe4726a76babd324fc8a6ec5c5994 /src/include
parentce5326eed386959aac7a322880896ddeade7fd52 (diff)
downloadpostgresql-631118fe1e8f66fd15eb0013dd9940d9146a5096.tar.gz
postgresql-631118fe1e8f66fd15eb0013dd9940d9146a5096.zip
Get rid of the post-recovery cleanup step of GIN page splits.
Replace it with an approach similar to what GiST uses: when a page is split, the left sibling is marked with a flag indicating that the parent hasn't been updated yet. When the parent is updated, the flag is cleared. If an insertion steps on a page with the flag set, it will finish split before proceeding with the insertion. The post-recovery cleanup mechanism was never totally reliable, as insertion to the parent could fail e.g because of running out of memory or disk space, leaving the tree in an inconsistent state. This also divides the responsibility of WAL-logging more clearly between the generic ginbtree.c code, and the parts specific to entry and posting trees. There is now a common WAL record format for insertions and deletions, which is written by ginbtree.c, followed by tree-specific payload, which is returned by the placetopage- and split- callbacks.
Diffstat (limited to 'src/include')
-rw-r--r--src/include/access/gin.h1
-rw-r--r--src/include/access/gin_private.h81
-rw-r--r--src/include/access/rmgrlist.h2
-rw-r--r--src/include/access/xlog_internal.h2
4 files changed, 60 insertions, 26 deletions
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index b6cb48da287..7dcb0e0f20b 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -58,6 +58,5 @@ extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
extern void gin_desc(StringInfo buf, uint8 xl_info, char *rec);
extern void gin_xlog_startup(void);
extern void gin_xlog_cleanup(void);
-extern bool gin_safe_restartpoint(void);
#endif /* GIN_H */
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index bd407fe342a..714c8ca9841 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -48,6 +48,7 @@ typedef GinPageOpaqueData *GinPageOpaque;
#define GIN_META (1 << 3)
#define GIN_LIST (1 << 4)
#define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */
+#define GIN_INCOMPLETE_SPLIT (1 << 6) /* page was split, but parent not updated */
/* Page numbers of fixed-location pages */
#define GIN_METAPAGE_BLKNO (0)
@@ -119,6 +120,7 @@ typedef struct GinMetaPageData
#define GinPageIsDeleted(page) ( GinPageGetOpaque(page)->flags & GIN_DELETED)
#define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
#define GinPageSetNonDeleted(page) ( GinPageGetOpaque(page)->flags &= ~GIN_DELETED)
+#define GinPageIsIncompleteSplit(page) ( GinPageGetOpaque(page)->flags & GIN_INCOMPLETE_SPLIT)
#define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber)
@@ -336,41 +338,77 @@ typedef struct ginxlogInsert
{
RelFileNode node;
BlockNumber blkno;
- BlockNumber updateBlkno;
+ uint16 flags; /* GIN_SPLIT_ISLEAF and/or GIN_SPLIT_ISDATA */
OffsetNumber offset;
- bool isDelete;
- bool isData;
- bool isLeaf;
- OffsetNumber nitem;
/*
- * follows: tuples or ItemPointerData or PostingItem or list of
- * ItemPointerData
+ * FOLLOWS:
+ *
+ * 1. if not leaf page, block numbers of the left and right child pages
+ * whose split this insertion finishes. As BlockIdData[2] (beware of adding
+ * fields before this that would make them not 16-bit aligned)
+ *
+ * 2. one of the following structs, depending on tree type.
+ *
+ * NB: the below structs are only 16-bit aligned when appended to a
+ * ginxlogInsert struct! Beware of adding fields to them that require
+ * stricter alignment.
*/
} ginxlogInsert;
+typedef struct
+{
+ bool isDelete;
+ IndexTupleData tuple; /* variable length */
+} ginxlogInsertEntry;
+
+typedef struct
+{
+ OffsetNumber nitem;
+ ItemPointerData items[1]; /* variable length */
+} ginxlogInsertDataLeaf;
+
+/* In an insert to an internal data page, the payload is a PostingItem */
+
+
#define XLOG_GIN_SPLIT 0x30
typedef struct ginxlogSplit
{
RelFileNode node;
BlockNumber lblkno;
- BlockNumber rootBlkno;
BlockNumber rblkno;
- BlockNumber rrlink;
+ BlockNumber rrlink; /* right link, or root's blocknumber if root split */
+ BlockNumber leftChildBlkno; /* valid on a non-leaf split */
+ BlockNumber rightChildBlkno;
+ uint16 flags;
+
+ /* follows: one of the following structs */
+} ginxlogSplit;
+
+/*
+ * Flags used in ginxlogInsert and ginxlogSplit records
+ */
+#define GIN_INSERT_ISDATA 0x01 /* for both insert and split records */
+#define GIN_INSERT_ISLEAF 0x02 /* .. */
+#define GIN_SPLIT_ROOT 0x04 /* only for split records */
+
+typedef struct
+{
OffsetNumber separator;
OffsetNumber nitem;
- bool isData;
- bool isLeaf;
- bool isRootSplit;
+ /* FOLLOWS: IndexTuples */
+} ginxlogSplitEntry;
- BlockNumber leftChildBlkno;
- BlockNumber updateBlkno;
+typedef struct
+{
+ OffsetNumber separator;
+ OffsetNumber nitem;
+ ItemPointerData rightbound;
- ItemPointerData rightbound; /* used only in posting tree */
- /* follows: list of tuple or ItemPointerData or PostingItem */
-} ginxlogSplit;
+ /* FOLLOWS: array of ItemPointers (for leaf) or PostingItems (non-leaf) */
+} ginxlogSplitData;
#define XLOG_GIN_VACUUM_PAGE 0x40
@@ -488,7 +526,7 @@ typedef struct GinBtreeData
bool (*placeToPage) (GinBtree, Buffer, OffsetNumber, void *, BlockNumber, XLogRecData **);
Page (*splitPage) (GinBtree, Buffer, Buffer, OffsetNumber, void *, BlockNumber, XLogRecData **);
void *(*prepareDownlink) (GinBtree, Buffer);
- void (*fillRoot) (GinBtree, Buffer, Buffer, Buffer);
+ void (*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page);
bool isData;
@@ -535,9 +573,6 @@ extern Buffer ginStepRight(Buffer buffer, Relation index, int lockmode);
extern void freeGinBtreeStack(GinBtreeStack *stack);
extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack,
void *insertdata, GinStatsData *buildStats);
-extern void ginFindParents(GinBtree btree, GinBtreeStack *stack);
-extern void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
- GinStatsData *buildStats);
/* ginentrypage.c */
extern IndexTuple GinFormTuple(GinState *ginstate,
@@ -547,7 +582,7 @@ extern void GinShortenTuple(IndexTuple itup, uint32 nipd);
extern void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
Datum key, GinNullCategory category,
GinState *ginstate);
-extern void ginEntryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf);
+extern void ginEntryFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
/* gindatapage.c */
extern BlockNumber createPostingTree(Relation index,
@@ -560,7 +595,7 @@ extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
ItemPointerData *items, uint32 nitem,
GinStatsData *buildStats);
extern GinBtreeStack *ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno);
-extern void ginDataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf);
+extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno);
/* ginscan.c */
diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h
index 7ad71b32e2c..166689db102 100644
--- a/src/include/access/rmgrlist.h
+++ b/src/include/access/rmgrlist.h
@@ -38,7 +38,7 @@ PG_RMGR(RM_HEAP2_ID, "Heap2", heap2_redo, heap2_desc, NULL, NULL, NULL)
PG_RMGR(RM_HEAP_ID, "Heap", heap_redo, heap_desc, NULL, NULL, NULL)
PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_xlog_startup, btree_xlog_cleanup, btree_safe_restartpoint)
PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, NULL, NULL, NULL)
-PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup, gin_safe_restartpoint)
+PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup, NULL)
PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup, NULL)
PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, NULL, NULL, NULL)
PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_xlog_startup, spg_xlog_cleanup, NULL)
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index c3e173106fc..b6320eee3cf 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -55,7 +55,7 @@ typedef struct BkpBlock
/*
* Each page of XLOG file has a header like this:
*/
-#define XLOG_PAGE_MAGIC 0xD076 /* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD077 /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData
{