aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access')
-rw-r--r--src/backend/access/rmgrdesc/Makefile6
-rw-r--r--src/backend/access/rmgrdesc/genericdesc.c58
-rw-r--r--src/backend/access/transam/Makefile4
-rw-r--r--src/backend/access/transam/generic_xlog.c431
-rw-r--r--src/backend/access/transam/rmgr.c1
5 files changed, 495 insertions, 5 deletions
diff --git a/src/backend/access/rmgrdesc/Makefile b/src/backend/access/rmgrdesc/Makefile
index c72a1f245d1..c0e38fdf17d 100644
--- a/src/backend/access/rmgrdesc/Makefile
+++ b/src/backend/access/rmgrdesc/Makefile
@@ -8,9 +8,9 @@ subdir = src/backend/access/rmgrdesc
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = brindesc.o clogdesc.o committsdesc.o dbasedesc.o gindesc.o gistdesc.o \
- hashdesc.o heapdesc.o mxactdesc.o nbtdesc.o relmapdesc.o \
- replorigindesc.o seqdesc.o smgrdesc.o spgdesc.o \
+OBJS = brindesc.o clogdesc.o committsdesc.o dbasedesc.o genericdesc.o \
+ gindesc.o gistdesc.o hashdesc.o heapdesc.o mxactdesc.o nbtdesc.o \
+ relmapdesc.o replorigindesc.o seqdesc.o smgrdesc.o spgdesc.o \
standbydesc.o tblspcdesc.o xactdesc.o xlogdesc.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/rmgrdesc/genericdesc.c b/src/backend/access/rmgrdesc/genericdesc.c
new file mode 100644
index 00000000000..caa9a036481
--- /dev/null
+++ b/src/backend/access/rmgrdesc/genericdesc.c
@@ -0,0 +1,58 @@
+/*-------------------------------------------------------------------------
+ *
+ * genericdesc.c
+ * rmgr descriptor routines for access/transam/generic_xlog.c
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/rmgrdesc/genericdesc.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/generic_xlog.h"
+#include "lib/stringinfo.h"
+#include "storage/relfilenode.h"
+
+/*
+ * Description of generic xlog record: write page regions that this record
+ * overrides.
+ */
+void
+generic_desc(StringInfo buf, XLogReaderState *record)
+{
+ Pointer ptr = XLogRecGetData(record),
+ end = ptr + XLogRecGetDataLen(record);
+
+ while (ptr < end)
+ {
+ OffsetNumber offset,
+ length;
+
+ memcpy(&offset, ptr, sizeof(offset));
+ ptr += sizeof(offset);
+ memcpy(&length, ptr, sizeof(length));
+ ptr += sizeof(length);
+ ptr += length;
+
+ if (ptr < end)
+ appendStringInfo(buf, "offset %u, length %u; ", offset, length);
+ else
+ appendStringInfo(buf, "offset %u, length %u", offset, length);
+ }
+
+ return;
+}
+
+/*
+ * Identification of generic xlog record: we don't distinguish any subtypes
+ * inside generic xlog records.
+ */
+const char *
+generic_identify(uint8 info)
+{
+ return "Generic";
+}
diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile
index 94455b23f7e..16fbe47269a 100644
--- a/src/backend/access/transam/Makefile
+++ b/src/backend/access/transam/Makefile
@@ -12,8 +12,8 @@ subdir = src/backend/access/transam
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = clog.o commit_ts.o multixact.o parallel.o rmgr.o slru.o subtrans.o \
- timeline.o transam.o twophase.o twophase_rmgr.o varsup.o \
+OBJS = clog.o commit_ts.o generic_xlog.o multixact.o parallel.o rmgr.o slru.o \
+ subtrans.o timeline.o transam.o twophase.o twophase_rmgr.o varsup.o \
xact.o xlog.o xlogarchive.o xlogfuncs.o \
xloginsert.o xlogreader.o xlogutils.o
diff --git a/src/backend/access/transam/generic_xlog.c b/src/backend/access/transam/generic_xlog.c
new file mode 100644
index 00000000000..e62179d2fb5
--- /dev/null
+++ b/src/backend/access/transam/generic_xlog.c
@@ -0,0 +1,431 @@
+/*-------------------------------------------------------------------------
+ *
+ * generic_xlog.c
+ * Implementation of generic xlog records.
+ *
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/transam/generic_xlog.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/generic_xlog.h"
+#include "access/xlogutils.h"
+#include "miscadmin.h"
+#include "utils/memutils.h"
+
+/*-------------------------------------------------------------------------
+ * Internally, a delta between pages consists of a set of fragments. Each
+ * fragment represents changes made in a given region of a page. A fragment
+ * is made up as follows:
+ *
+ * - offset of page region (OffsetNumber)
+ * - length of page region (OffsetNumber)
+ * - data - the data to place into the region ('length' number of bytes)
+ *
+ * Unchanged regions of a page are not represented in its delta. As a
+ * result, a delta can be more compact than the full page image. But having
+ * an unchanged region in the middle of two fragments that is smaller than
+ * the fragment header (offset and length) does not pay off in terms of the
+ * overall size of the delta. For this reason, we break fragments only if
+ * the unchanged region is bigger than MATCH_THRESHOLD.
+ *
+ * The worst case for delta sizes occurs when we did not find any unchanged
+ * region in the page. The size of the delta will be the size of the page plus
+ * the size of the fragment header in that case.
+ *-------------------------------------------------------------------------
+ */
+#define FRAGMENT_HEADER_SIZE (2 * sizeof(OffsetNumber))
+#define MATCH_THRESHOLD FRAGMENT_HEADER_SIZE
+#define MAX_DELTA_SIZE BLCKSZ + FRAGMENT_HEADER_SIZE
+
+/* Struct of generic xlog data for single page */
+typedef struct
+{
+ Buffer buffer; /* registered buffer */
+ char image[BLCKSZ]; /* copy of page image for modification */
+ char data[MAX_DELTA_SIZE]; /* delta between page images */
+ int dataLen; /* space consumed in data field */
+ bool fullImage; /* are we taking a full image of this page? */
+} PageData;
+
+/* State of generic xlog record construction */
+struct GenericXLogState
+{
+ bool isLogged;
+ PageData pages[MAX_GENERIC_XLOG_PAGES];
+};
+
+static void writeFragment(PageData *pageData, OffsetNumber offset,
+ OffsetNumber len, Pointer data);
+static void writeDelta(PageData *pageData);
+static void applyPageRedo(Page page, Pointer data, Size dataSize);
+
+/*
+ * Write next fragment into delta.
+ */
+static void
+writeFragment(PageData *pageData, OffsetNumber offset, OffsetNumber length,
+ Pointer data)
+{
+ Pointer ptr = pageData->data + pageData->dataLen;
+
+ /* Check if we have enough space */
+ Assert(pageData->dataLen + sizeof(offset) +
+ sizeof(length) + length <= sizeof(pageData->data));
+
+ /* Write fragment data */
+ memcpy(ptr, &offset, sizeof(offset));
+ ptr += sizeof(offset);
+ memcpy(ptr, &length, sizeof(length));
+ ptr += sizeof(length);
+ memcpy(ptr, data, length);
+ ptr += length;
+
+ pageData->dataLen = ptr - pageData->data;
+}
+
+/*
+ * Make delta for given page.
+ */
+static void
+writeDelta(PageData *pageData)
+{
+ Page page = BufferGetPage(pageData->buffer),
+ image = (Page) pageData->image;
+ int i,
+ fragmentBegin = -1,
+ fragmentEnd = -1;
+ uint16 pageLower = ((PageHeader) page)->pd_lower,
+ pageUpper = ((PageHeader) page)->pd_upper,
+ imageLower = ((PageHeader) image)->pd_lower,
+ imageUpper = ((PageHeader) image)->pd_upper;
+
+ for (i = 0; i < BLCKSZ; i++)
+ {
+ bool match;
+
+ /*
+ * Check if bytes in old and new page images match. We do not care
+ * about data in the unallocated area between pd_lower and pd_upper.
+ * We assume the unallocated area to expand with unmatched bytes.
+ * Bytes inside the unallocated area are assumed to always match.
+ */
+ if (i < pageLower)
+ {
+ if (i < imageLower)
+ match = (page[i] == image[i]);
+ else
+ match = false;
+ }
+ else if (i >= pageUpper)
+ {
+ if (i >= imageUpper)
+ match = (page[i] == image[i]);
+ else
+ match = false;
+ }
+ else
+ {
+ match = true;
+ }
+
+ if (match)
+ {
+ if (fragmentBegin >= 0)
+ {
+ /* Matched byte is potentially part of a fragment. */
+ if (fragmentEnd < 0)
+ fragmentEnd = i;
+
+ /*
+ * Write next fragment if sequence of matched bytes is longer
+ * than MATCH_THRESHOLD.
+ */
+ if (i - fragmentEnd >= MATCH_THRESHOLD)
+ {
+ writeFragment(pageData, fragmentBegin,
+ fragmentEnd - fragmentBegin,
+ page + fragmentBegin);
+ fragmentBegin = -1;
+ fragmentEnd = -1;
+ }
+ }
+ }
+ else
+ {
+ /* On unmatched byte, start new fragment if it is not done yet */
+ if (fragmentBegin < 0)
+ fragmentBegin = i;
+ fragmentEnd = -1;
+ }
+ }
+
+ if (fragmentBegin >= 0)
+ writeFragment(pageData, fragmentBegin,
+ BLCKSZ - fragmentBegin,
+ page + fragmentBegin);
+
+#ifdef WAL_DEBUG
+ /*
+ * If xlog debug is enabled, then check produced delta. Result of delta
+ * application to saved image should be the same as current page state.
+ */
+ if (XLOG_DEBUG)
+ {
+ char tmp[BLCKSZ];
+ memcpy(tmp, image, BLCKSZ);
+ applyPageRedo(tmp, pageData->data, pageData->dataLen);
+ if (memcmp(tmp, page, pageLower)
+ || memcmp(tmp + pageUpper, page + pageUpper, BLCKSZ - pageUpper))
+ elog(ERROR, "result of generic xlog apply does not match");
+ }
+#endif
+}
+
+/*
+ * Start new generic xlog record.
+ */
+GenericXLogState *
+GenericXLogStart(Relation relation)
+{
+ int i;
+ GenericXLogState *state;
+
+ state = (GenericXLogState *) palloc(sizeof(GenericXLogState));
+
+ state->isLogged = RelationNeedsWAL(relation);
+ for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
+ state->pages[i].buffer = InvalidBuffer;
+
+ return state;
+}
+
+/*
+ * Register new buffer for generic xlog record.
+ */
+Page
+GenericXLogRegister(GenericXLogState *state, Buffer buffer, bool isNew)
+{
+ int block_id;
+
+ /* Place new buffer to unused slot in array */
+ for (block_id = 0; block_id < MAX_GENERIC_XLOG_PAGES; block_id++)
+ {
+ PageData *page = &state->pages[block_id];
+ if (BufferIsInvalid(page->buffer))
+ {
+ page->buffer = buffer;
+ memcpy(page->image, BufferGetPage(buffer), BLCKSZ);
+ page->dataLen = 0;
+ page->fullImage = isNew;
+ return (Page)page->image;
+ }
+ else if (page->buffer == buffer)
+ {
+ /*
+ * Buffer is already registered. Just return the image, which is
+ * already prepared.
+ */
+ return (Page)page->image;
+ }
+ }
+
+ elog(ERROR, "maximum number of %d generic xlog buffers is exceeded",
+ MAX_GENERIC_XLOG_PAGES);
+
+ /* keep compiler quiet */
+ return NULL;
+}
+
+/*
+ * Unregister particular buffer for generic xlog record.
+ */
+void
+GenericXLogUnregister(GenericXLogState *state, Buffer buffer)
+{
+ int block_id;
+
+ /* Find block in array to unregister */
+ for (block_id = 0; block_id < MAX_GENERIC_XLOG_PAGES; block_id++)
+ {
+ if (state->pages[block_id].buffer == buffer)
+ {
+ /*
+ * Preserve order of pages in array because it could matter for
+ * concurrency.
+ */
+ memmove(&state->pages[block_id], &state->pages[block_id + 1],
+ (MAX_GENERIC_XLOG_PAGES - block_id - 1) * sizeof(PageData));
+ state->pages[MAX_GENERIC_XLOG_PAGES - 1].buffer = InvalidBuffer;
+ return;
+ }
+ }
+
+ elog(ERROR, "registered generic xlog buffer not found");
+}
+
+/*
+ * Put all changes in registered buffers to generic xlog record.
+ */
+XLogRecPtr
+GenericXLogFinish(GenericXLogState *state)
+{
+ XLogRecPtr lsn = InvalidXLogRecPtr;
+ int i;
+
+ if (state->isLogged)
+ {
+ /* Logged relation: make xlog record in critical section. */
+ XLogBeginInsert();
+
+ START_CRIT_SECTION();
+
+ for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
+ {
+ char tmp[BLCKSZ];
+ PageData *page = &state->pages[i];
+
+ if (BufferIsInvalid(page->buffer))
+ continue;
+
+ /* Swap current and saved page image. */
+ memcpy(tmp, page->image, BLCKSZ);
+ memcpy(page->image, BufferGetPage(page->buffer), BLCKSZ);
+ memcpy(BufferGetPage(page->buffer), tmp, BLCKSZ);
+
+ if (page->fullImage)
+ {
+ /* A full page image does not require anything special */
+ XLogRegisterBuffer(i, page->buffer, REGBUF_FORCE_IMAGE);
+ }
+ else
+ {
+ /*
+ * In normal mode, calculate delta and write it as data
+ * associated with this page.
+ */
+ XLogRegisterBuffer(i, page->buffer, REGBUF_STANDARD);
+ writeDelta(page);
+ XLogRegisterBufData(i, page->data, page->dataLen);
+ }
+ }
+
+ /* Insert xlog record */
+ lsn = XLogInsert(RM_GENERIC_ID, 0);
+
+ /* Set LSN and mark buffers dirty */
+ for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
+ {
+ PageData *page = &state->pages[i];
+
+ if (BufferIsInvalid(page->buffer))
+ continue;
+ PageSetLSN(BufferGetPage(page->buffer), lsn);
+ MarkBufferDirty(page->buffer);
+ }
+ END_CRIT_SECTION();
+ }
+ else
+ {
+ /* Unlogged relation: skip xlog-related stuff */
+ START_CRIT_SECTION();
+ for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
+ {
+ PageData *page = &state->pages[i];
+
+ if (BufferIsInvalid(page->buffer))
+ continue;
+ memcpy(BufferGetPage(page->buffer), page->image, BLCKSZ);
+ MarkBufferDirty(page->buffer);
+ }
+ END_CRIT_SECTION();
+ }
+
+ pfree(state);
+
+ return lsn;
+}
+
+/*
+ * Abort generic xlog record.
+ */
+void
+GenericXLogAbort(GenericXLogState *state)
+{
+ pfree(state);
+}
+
+/*
+ * Apply delta to given page image.
+ */
+static void
+applyPageRedo(Page page, Pointer data, Size dataSize)
+{
+ Pointer ptr = data, end = data + dataSize;
+
+ while (ptr < end)
+ {
+ OffsetNumber offset,
+ length;
+
+ memcpy(&offset, ptr, sizeof(offset));
+ ptr += sizeof(offset);
+ memcpy(&length, ptr, sizeof(length));
+ ptr += sizeof(length);
+
+ memcpy(page + offset, ptr, length);
+
+ ptr += length;
+ }
+}
+
+/*
+ * Redo function for generic xlog record.
+ */
+void
+generic_redo(XLogReaderState *record)
+{
+ uint8 block_id;
+ Buffer buffers[MAX_GENERIC_XLOG_PAGES] = {InvalidBuffer};
+ XLogRecPtr lsn = record->EndRecPtr;
+
+ Assert(record->max_block_id < MAX_GENERIC_XLOG_PAGES);
+
+ /* Iterate over blocks */
+ for (block_id = 0; block_id <= record->max_block_id; block_id++)
+ {
+ XLogRedoAction action;
+
+ if (!XLogRecHasBlockRef(record, block_id))
+ continue;
+
+ action = XLogReadBufferForRedo(record, block_id, &buffers[block_id]);
+
+ /* Apply redo to given block if needed */
+ if (action == BLK_NEEDS_REDO)
+ {
+ Pointer blockData;
+ Size blockDataSize;
+ Page page;
+
+ page = BufferGetPage(buffers[block_id]);
+ blockData = XLogRecGetBlockData(record, block_id, &blockDataSize);
+ applyPageRedo(page, blockData, blockDataSize);
+
+ PageSetLSN(page, lsn);
+ MarkBufferDirty(buffers[block_id]);
+ }
+ }
+
+ /* Changes are done: unlock and release all buffers */
+ for (block_id = 0; block_id <= record->max_block_id; block_id++)
+ {
+ if (BufferIsValid(buffers[block_id]))
+ UnlockReleaseBuffer(buffers[block_id]);
+ }
+}
diff --git a/src/backend/access/transam/rmgr.c b/src/backend/access/transam/rmgr.c
index 7c4d773ce0f..7b38c16f521 100644
--- a/src/backend/access/transam/rmgr.c
+++ b/src/backend/access/transam/rmgr.c
@@ -11,6 +11,7 @@
#include "access/commit_ts.h"
#include "access/gin.h"
#include "access/gist_private.h"
+#include "access/generic_xlog.h"
#include "access/hash.h"
#include "access/heapam_xlog.h"
#include "access/brin_xlog.h"