aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2010-12-29 06:48:53 -0500
committerRobert Haas <rhaas@postgresql.org>2010-12-29 06:48:53 -0500
commit53dbc27c62d8e1b6c5253feba04a5094cb8fe046 (patch)
treeb27563b69fa73dc4b7dc873bfc653bedc6ba1e05 /src/backend/access
parent9b8aff8c192e2f313f90395d114c58a9ef84f97f (diff)
downloadpostgresql-53dbc27c62d8e1b6c5253feba04a5094cb8fe046.tar.gz
postgresql-53dbc27c62d8e1b6c5253feba04a5094cb8fe046.zip
Support unlogged tables.
The contents of an unlogged table are WAL-logged; thus, they are not available on standby servers and are truncated whenever the database system enters recovery. Indexes on unlogged tables are also unlogged. Unlogged GiST indexes are not currently supported.
Diffstat (limited to 'src/backend/access')
-rw-r--r--src/backend/access/gin/gininsert.c42
-rw-r--r--src/backend/access/gist/gist.c13
-rw-r--r--src/backend/access/hash/hash.c15
-rw-r--r--src/backend/access/hash/hashovfl.c9
-rw-r--r--src/backend/access/hash/hashpage.c20
-rw-r--r--src/backend/access/nbtree/nbtree.c31
-rw-r--r--src/backend/access/transam/xlog.c17
7 files changed, 132 insertions, 15 deletions
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 8681edefe67..d66c79cb8de 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -19,6 +19,7 @@
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
+#include "storage/smgr.h"
#include "storage/indexfsm.h"
#include "utils/memutils.h"
@@ -412,6 +413,47 @@ ginbuild(PG_FUNCTION_ARGS)
}
/*
+ * ginbuildempty() -- build an empty gin index in the initialization fork
+ */
+Datum
+ginbuildempty(PG_FUNCTION_ARGS)
+{
+ Relation index = (Relation) PG_GETARG_POINTER(0);
+ Buffer RootBuffer,
+ MetaBuffer;
+
+ /* An empty GIN index has two pages. */
+ MetaBuffer =
+ ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+ LockBuffer(MetaBuffer, BUFFER_LOCK_EXCLUSIVE);
+ RootBuffer =
+ ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+ LockBuffer(RootBuffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /* Initialize both pages, mark them dirty, unlock and release buffer. */
+ START_CRIT_SECTION();
+ GinInitMetabuffer(MetaBuffer);
+ MarkBufferDirty(MetaBuffer);
+ GinInitBuffer(RootBuffer, GIN_LEAF);
+ MarkBufferDirty(RootBuffer);
+
+ /* XLOG the new pages */
+ log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
+ BufferGetBlockNumber(MetaBuffer),
+ BufferGetPage(MetaBuffer));
+ log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
+ BufferGetBlockNumber(RootBuffer),
+ BufferGetPage(RootBuffer));
+ END_CRIT_SECTION();
+
+ /* Unlock and release the buffers. */
+ UnlockReleaseBuffer(MetaBuffer);
+ UnlockReleaseBuffer(RootBuffer);
+
+ PG_RETURN_VOID();
+}
+
+/*
* Inserts value during normal insertion
*/
static uint32
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 7cd144e2f09..c26ac74332d 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -219,6 +219,19 @@ gistbuildCallback(Relation index,
}
/*
+ * gistbuildempty() -- build an empty gist index in the initialization fork
+ */
+Datum
+gistbuildempty(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unlogged GIST indexes are not supported")));
+
+ PG_RETURN_VOID();
+}
+
+/*
* gistinsert -- wrapper for GiST tuple insertion.
*
* This is the public interface routine for tuple insertion in GiSTs.
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index e53ec3d5eaa..4df92d44c03 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -69,7 +69,7 @@ hashbuild(PG_FUNCTION_ARGS)
estimate_rel_size(heap, NULL, &relpages, &reltuples);
/* Initialize the hash index metadata page and initial buckets */
- num_buckets = _hash_metapinit(index, reltuples);
+ num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM);
/*
* If we just insert the tuples into the index in scan order, then
@@ -114,6 +114,19 @@ hashbuild(PG_FUNCTION_ARGS)
}
/*
+ * hashbuildempty() -- build an empty hash index in the initialization fork
+ */
+Datum
+hashbuildempty(PG_FUNCTION_ARGS)
+{
+ Relation index = (Relation) PG_GETARG_POINTER(0);
+
+ _hash_metapinit(index, 0, INIT_FORKNUM);
+
+ PG_RETURN_VOID();
+}
+
+/*
* Per-tuple callback from IndexBuildHeapScan
*/
static void
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index 7c6e902ea93..454ad6c7a8a 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -259,7 +259,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
* convenient to pre-mark them as "in use" too.
*/
bit = metap->hashm_spares[splitnum];
- _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit));
+ _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit), MAIN_FORKNUM);
metap->hashm_spares[splitnum]++;
}
else
@@ -280,7 +280,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
* with metapage write lock held; would be better to use a lock that
* doesn't block incoming searches.
*/
- newbuf = _hash_getnewbuf(rel, blkno);
+ newbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
metap->hashm_spares[splitnum]++;
@@ -503,7 +503,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf,
* All bits in the new bitmap page are set to "1", indicating "in use".
*/
void
-_hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
+_hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno,
+ ForkNumber forkNum)
{
Buffer buf;
Page pg;
@@ -520,7 +521,7 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
* page while holding the metapage lock, but this path is taken so seldom
* that it's not worth worrying about.
*/
- buf = _hash_getnewbuf(rel, blkno);
+ buf = _hash_getnewbuf(rel, blkno, forkNum);
pg = BufferGetPage(buf);
/* initialize the page's special space */
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index 2ebeda98b59..29f7b25b4ec 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -183,9 +183,9 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno)
* extend the index at a time.
*/
Buffer
-_hash_getnewbuf(Relation rel, BlockNumber blkno)
+_hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum)
{
- BlockNumber nblocks = RelationGetNumberOfBlocks(rel);
+ BlockNumber nblocks = RelationGetNumberOfBlocksInFork(rel, forkNum);
Buffer buf;
if (blkno == P_NEW)
@@ -197,13 +197,13 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno)
/* smgr insists we use P_NEW to extend the relation */
if (blkno == nblocks)
{
- buf = ReadBuffer(rel, P_NEW);
+ buf = ReadBufferExtended(rel, forkNum, P_NEW, RBM_NORMAL, NULL);
if (BufferGetBlockNumber(buf) != blkno)
elog(ERROR, "unexpected hash relation size: %u, should be %u",
BufferGetBlockNumber(buf), blkno);
}
else
- buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_ZERO, NULL);
+ buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO, NULL);
LockBuffer(buf, HASH_WRITE);
@@ -324,7 +324,7 @@ _hash_chgbufaccess(Relation rel,
* multiple buffer locks is ignored.
*/
uint32
-_hash_metapinit(Relation rel, double num_tuples)
+_hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
{
HashMetaPage metap;
HashPageOpaque pageopaque;
@@ -340,7 +340,7 @@ _hash_metapinit(Relation rel, double num_tuples)
uint32 i;
/* safety check */
- if (RelationGetNumberOfBlocks(rel) != 0)
+ if (RelationGetNumberOfBlocksInFork(rel, forkNum) != 0)
elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
RelationGetRelationName(rel));
@@ -383,7 +383,7 @@ _hash_metapinit(Relation rel, double num_tuples)
* calls to occur. This ensures that the smgr level has the right idea of
* the physical index length.
*/
- metabuf = _hash_getnewbuf(rel, HASH_METAPAGE);
+ metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
pg = BufferGetPage(metabuf);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
@@ -451,7 +451,7 @@ _hash_metapinit(Relation rel, double num_tuples)
/* Allow interrupts, in case N is huge */
CHECK_FOR_INTERRUPTS();
- buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i));
+ buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i), forkNum);
pg = BufferGetPage(buf);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
pageopaque->hasho_prevblkno = InvalidBlockNumber;
@@ -468,7 +468,7 @@ _hash_metapinit(Relation rel, double num_tuples)
/*
* Initialize first bitmap page
*/
- _hash_initbitmap(rel, metap, num_buckets + 1);
+ _hash_initbitmap(rel, metap, num_buckets + 1, forkNum);
/* all done */
_hash_wrtbuf(rel, metabuf);
@@ -785,7 +785,7 @@ _hash_splitbucket(Relation rel,
oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
nblkno = start_nblkno;
- nbuf = _hash_getnewbuf(rel, nblkno);
+ nbuf = _hash_getnewbuf(rel, nblkno, MAIN_FORKNUM);
npage = BufferGetPage(nbuf);
/* initialize the new bucket's primary page */
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 655a40090e9..a13d629b0ef 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -29,6 +29,7 @@
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
+#include "storage/smgr.h"
#include "utils/memutils.h"
@@ -205,6 +206,36 @@ btbuildCallback(Relation index,
}
/*
+ * btbuildempty() -- build an empty btree index in the initialization fork
+ */
+Datum
+btbuildempty(PG_FUNCTION_ARGS)
+{
+ Relation index = (Relation) PG_GETARG_POINTER(0);
+ Page metapage;
+
+ /* Construct metapage. */
+ metapage = (Page) palloc(BLCKSZ);
+ _bt_initmetapage(metapage, P_NONE, 0);
+
+ /* Write the page. If archiving/streaming, XLOG it. */
+ smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE,
+ (char *) metapage, true);
+ if (XLogIsNeeded())
+ log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
+ BTREE_METAPAGE, metapage);
+
+ /*
+ * An immediate sync is require even if we xlog'd the page, because the
+ * write did not go through shared_buffers and therefore a concurrent
+ * checkpoint may have move the redo pointer past our xlog record.
+ */
+ smgrimmedsync(index->rd_smgr, INIT_FORKNUM);
+
+ PG_RETURN_VOID();
+}
+
+/*
* btinsert() -- insert an index tuple into a btree.
*
* Descend the tree recursively, find the appropriate location for our
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index bf62138bf86..1ec6f2f15ac 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -49,6 +49,7 @@
#include "storage/latch.h"
#include "storage/pmsignal.h"
#include "storage/procarray.h"
+#include "storage/reinit.h"
#include "storage/smgr.h"
#include "storage/spin.h"
#include "utils/builtins.h"
@@ -5961,6 +5962,14 @@ StartupXLOG(void)
CheckRequiredParameterValues();
/*
+ * We're in recovery, so unlogged relations relations may be trashed
+ * and must be reset. This should be done BEFORE allowing Hot
+ * Standby connections, so that read-only backends don't try to
+ * read whatever garbage is left over from before.
+ */
+ ResetUnloggedRelations(UNLOGGED_RELATION_CLEANUP);
+
+ /*
* Initialize for Hot Standby, if enabled. We won't let backends in
* yet, not until we've reached the min recovery point specified in
* control file and we've established a recovery snapshot from a
@@ -6414,6 +6423,14 @@ StartupXLOG(void)
PreallocXlogFiles(EndOfLog);
/*
+ * Reset initial contents of unlogged relations. This has to be done
+ * AFTER recovery is complete so that any unlogged relations created
+ * during recovery also get picked up.
+ */
+ if (InRecovery)
+ ResetUnloggedRelations(UNLOGGED_RELATION_INIT);
+
+ /*
* Okay, we're officially UP.
*/
InRecovery = false;