aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2010-12-29 06:48:53 -0500
committerRobert Haas <rhaas@postgresql.org>2010-12-29 06:48:53 -0500
commit53dbc27c62d8e1b6c5253feba04a5094cb8fe046 (patch)
treeb27563b69fa73dc4b7dc873bfc653bedc6ba1e05 /src/backend
parent9b8aff8c192e2f313f90395d114c58a9ef84f97f (diff)
downloadpostgresql-53dbc27c62d8e1b6c5253feba04a5094cb8fe046.tar.gz
postgresql-53dbc27c62d8e1b6c5253feba04a5094cb8fe046.zip
Support unlogged tables.
The contents of an unlogged table are WAL-logged; thus, they are not available on standby servers and are truncated whenever the database system enters recovery. Indexes on unlogged tables are also unlogged. Unlogged GiST indexes are not currently supported.
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/gin/gininsert.c42
-rw-r--r--src/backend/access/gist/gist.c13
-rw-r--r--src/backend/access/hash/hash.c15
-rw-r--r--src/backend/access/hash/hashovfl.c9
-rw-r--r--src/backend/access/hash/hashpage.c20
-rw-r--r--src/backend/access/nbtree/nbtree.c31
-rw-r--r--src/backend/access/transam/xlog.c17
-rw-r--r--src/backend/catalog/catalog.c14
-rw-r--r--src/backend/catalog/heap.c19
-rw-r--r--src/backend/catalog/index.c11
-rw-r--r--src/backend/catalog/storage.c49
-rw-r--r--src/backend/commands/tablecmds.c19
-rw-r--r--src/backend/parser/gram.y11
-rw-r--r--src/backend/storage/buffer/bufmgr.c53
-rw-r--r--src/backend/storage/file/Makefile2
-rw-r--r--src/backend/storage/file/copydir.c3
-rw-r--r--src/backend/storage/file/fd.c2
-rw-r--r--src/backend/storage/file/reinit.c396
-rw-r--r--src/backend/utils/adt/dbsize.c1
-rw-r--r--src/backend/utils/cache/relcache.c2
20 files changed, 662 insertions, 67 deletions
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 8681edefe67..d66c79cb8de 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -19,6 +19,7 @@
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
+#include "storage/smgr.h"
#include "storage/indexfsm.h"
#include "utils/memutils.h"
@@ -412,6 +413,47 @@ ginbuild(PG_FUNCTION_ARGS)
}
/*
+ * ginbuildempty() -- build an empty gin index in the initialization fork
+ */
+Datum
+ginbuildempty(PG_FUNCTION_ARGS)
+{
+ Relation index = (Relation) PG_GETARG_POINTER(0);
+ Buffer RootBuffer,
+ MetaBuffer;
+
+ /* An empty GIN index has two pages. */
+ MetaBuffer =
+ ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+ LockBuffer(MetaBuffer, BUFFER_LOCK_EXCLUSIVE);
+ RootBuffer =
+ ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+ LockBuffer(RootBuffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /* Initialize both pages, mark them dirty, unlock and release buffer. */
+ START_CRIT_SECTION();
+ GinInitMetabuffer(MetaBuffer);
+ MarkBufferDirty(MetaBuffer);
+ GinInitBuffer(RootBuffer, GIN_LEAF);
+ MarkBufferDirty(RootBuffer);
+
+ /* XLOG the new pages */
+ log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
+ BufferGetBlockNumber(MetaBuffer),
+ BufferGetPage(MetaBuffer));
+ log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
+ BufferGetBlockNumber(RootBuffer),
+ BufferGetPage(RootBuffer));
+ END_CRIT_SECTION();
+
+ /* Unlock and release the buffers. */
+ UnlockReleaseBuffer(MetaBuffer);
+ UnlockReleaseBuffer(RootBuffer);
+
+ PG_RETURN_VOID();
+}
+
+/*
* Inserts value during normal insertion
*/
static uint32
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 7cd144e2f09..c26ac74332d 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -219,6 +219,19 @@ gistbuildCallback(Relation index,
}
/*
+ * gistbuildempty() -- build an empty gist index in the initialization fork
+ */
+Datum
+gistbuildempty(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unlogged GIST indexes are not supported")));
+
+ PG_RETURN_VOID();
+}
+
+/*
* gistinsert -- wrapper for GiST tuple insertion.
*
* This is the public interface routine for tuple insertion in GiSTs.
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index e53ec3d5eaa..4df92d44c03 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -69,7 +69,7 @@ hashbuild(PG_FUNCTION_ARGS)
estimate_rel_size(heap, NULL, &relpages, &reltuples);
/* Initialize the hash index metadata page and initial buckets */
- num_buckets = _hash_metapinit(index, reltuples);
+ num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM);
/*
* If we just insert the tuples into the index in scan order, then
@@ -114,6 +114,19 @@ hashbuild(PG_FUNCTION_ARGS)
}
/*
+ * hashbuildempty() -- build an empty hash index in the initialization fork
+ */
+Datum
+hashbuildempty(PG_FUNCTION_ARGS)
+{
+ Relation index = (Relation) PG_GETARG_POINTER(0);
+
+ _hash_metapinit(index, 0, INIT_FORKNUM);
+
+ PG_RETURN_VOID();
+}
+
+/*
* Per-tuple callback from IndexBuildHeapScan
*/
static void
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index 7c6e902ea93..454ad6c7a8a 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -259,7 +259,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
* convenient to pre-mark them as "in use" too.
*/
bit = metap->hashm_spares[splitnum];
- _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit));
+ _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit), MAIN_FORKNUM);
metap->hashm_spares[splitnum]++;
}
else
@@ -280,7 +280,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
* with metapage write lock held; would be better to use a lock that
* doesn't block incoming searches.
*/
- newbuf = _hash_getnewbuf(rel, blkno);
+ newbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
metap->hashm_spares[splitnum]++;
@@ -503,7 +503,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf,
* All bits in the new bitmap page are set to "1", indicating "in use".
*/
void
-_hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
+_hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno,
+ ForkNumber forkNum)
{
Buffer buf;
Page pg;
@@ -520,7 +521,7 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
* page while holding the metapage lock, but this path is taken so seldom
* that it's not worth worrying about.
*/
- buf = _hash_getnewbuf(rel, blkno);
+ buf = _hash_getnewbuf(rel, blkno, forkNum);
pg = BufferGetPage(buf);
/* initialize the page's special space */
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index 2ebeda98b59..29f7b25b4ec 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -183,9 +183,9 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno)
* extend the index at a time.
*/
Buffer
-_hash_getnewbuf(Relation rel, BlockNumber blkno)
+_hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum)
{
- BlockNumber nblocks = RelationGetNumberOfBlocks(rel);
+ BlockNumber nblocks = RelationGetNumberOfBlocksInFork(rel, forkNum);
Buffer buf;
if (blkno == P_NEW)
@@ -197,13 +197,13 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno)
/* smgr insists we use P_NEW to extend the relation */
if (blkno == nblocks)
{
- buf = ReadBuffer(rel, P_NEW);
+ buf = ReadBufferExtended(rel, forkNum, P_NEW, RBM_NORMAL, NULL);
if (BufferGetBlockNumber(buf) != blkno)
elog(ERROR, "unexpected hash relation size: %u, should be %u",
BufferGetBlockNumber(buf), blkno);
}
else
- buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_ZERO, NULL);
+ buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO, NULL);
LockBuffer(buf, HASH_WRITE);
@@ -324,7 +324,7 @@ _hash_chgbufaccess(Relation rel,
* multiple buffer locks is ignored.
*/
uint32
-_hash_metapinit(Relation rel, double num_tuples)
+_hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
{
HashMetaPage metap;
HashPageOpaque pageopaque;
@@ -340,7 +340,7 @@ _hash_metapinit(Relation rel, double num_tuples)
uint32 i;
/* safety check */
- if (RelationGetNumberOfBlocks(rel) != 0)
+ if (RelationGetNumberOfBlocksInFork(rel, forkNum) != 0)
elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
RelationGetRelationName(rel));
@@ -383,7 +383,7 @@ _hash_metapinit(Relation rel, double num_tuples)
* calls to occur. This ensures that the smgr level has the right idea of
* the physical index length.
*/
- metabuf = _hash_getnewbuf(rel, HASH_METAPAGE);
+ metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
pg = BufferGetPage(metabuf);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
@@ -451,7 +451,7 @@ _hash_metapinit(Relation rel, double num_tuples)
/* Allow interrupts, in case N is huge */
CHECK_FOR_INTERRUPTS();
- buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i));
+ buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i), forkNum);
pg = BufferGetPage(buf);
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
pageopaque->hasho_prevblkno = InvalidBlockNumber;
@@ -468,7 +468,7 @@ _hash_metapinit(Relation rel, double num_tuples)
/*
* Initialize first bitmap page
*/
- _hash_initbitmap(rel, metap, num_buckets + 1);
+ _hash_initbitmap(rel, metap, num_buckets + 1, forkNum);
/* all done */
_hash_wrtbuf(rel, metabuf);
@@ -785,7 +785,7 @@ _hash_splitbucket(Relation rel,
oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
nblkno = start_nblkno;
- nbuf = _hash_getnewbuf(rel, nblkno);
+ nbuf = _hash_getnewbuf(rel, nblkno, MAIN_FORKNUM);
npage = BufferGetPage(nbuf);
/* initialize the new bucket's primary page */
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 655a40090e9..a13d629b0ef 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -29,6 +29,7 @@
#include "storage/indexfsm.h"
#include "storage/ipc.h"
#include "storage/lmgr.h"
+#include "storage/smgr.h"
#include "utils/memutils.h"
@@ -205,6 +206,36 @@ btbuildCallback(Relation index,
}
/*
+ * btbuildempty() -- build an empty btree index in the initialization fork
+ */
+Datum
+btbuildempty(PG_FUNCTION_ARGS)
+{
+ Relation index = (Relation) PG_GETARG_POINTER(0);
+ Page metapage;
+
+ /* Construct metapage. */
+ metapage = (Page) palloc(BLCKSZ);
+ _bt_initmetapage(metapage, P_NONE, 0);
+
+ /* Write the page. If archiving/streaming, XLOG it. */
+ smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE,
+ (char *) metapage, true);
+ if (XLogIsNeeded())
+ log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
+ BTREE_METAPAGE, metapage);
+
+ /*
+ * An immediate sync is require even if we xlog'd the page, because the
+ * write did not go through shared_buffers and therefore a concurrent
+ * checkpoint may have move the redo pointer past our xlog record.
+ */
+ smgrimmedsync(index->rd_smgr, INIT_FORKNUM);
+
+ PG_RETURN_VOID();
+}
+
+/*
* btinsert() -- insert an index tuple into a btree.
*
* Descend the tree recursively, find the appropriate location for our
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index bf62138bf86..1ec6f2f15ac 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -49,6 +49,7 @@
#include "storage/latch.h"
#include "storage/pmsignal.h"
#include "storage/procarray.h"
+#include "storage/reinit.h"
#include "storage/smgr.h"
#include "storage/spin.h"
#include "utils/builtins.h"
@@ -5961,6 +5962,14 @@ StartupXLOG(void)
CheckRequiredParameterValues();
/*
+ * We're in recovery, so unlogged relations relations may be trashed
+ * and must be reset. This should be done BEFORE allowing Hot
+ * Standby connections, so that read-only backends don't try to
+ * read whatever garbage is left over from before.
+ */
+ ResetUnloggedRelations(UNLOGGED_RELATION_CLEANUP);
+
+ /*
* Initialize for Hot Standby, if enabled. We won't let backends in
* yet, not until we've reached the min recovery point specified in
* control file and we've established a recovery snapshot from a
@@ -6414,6 +6423,14 @@ StartupXLOG(void)
PreallocXlogFiles(EndOfLog);
/*
+ * Reset initial contents of unlogged relations. This has to be done
+ * AFTER recovery is complete so that any unlogged relations created
+ * during recovery also get picked up.
+ */
+ if (InRecovery)
+ ResetUnloggedRelations(UNLOGGED_RELATION_INIT);
+
+ /*
* Okay, we're officially UP.
*/
InRecovery = false;
diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c
index 88b5c2a215d..fc5a8fcd655 100644
--- a/src/backend/catalog/catalog.c
+++ b/src/backend/catalog/catalog.c
@@ -55,7 +55,8 @@
const char *forkNames[] = {
"main", /* MAIN_FORKNUM */
"fsm", /* FSM_FORKNUM */
- "vm" /* VISIBILITYMAP_FORKNUM */
+ "vm", /* VISIBILITYMAP_FORKNUM */
+ "init" /* INIT_FORKNUM */
};
/*
@@ -82,14 +83,14 @@ forkname_to_number(char *forkName)
* We use this to figure out whether a filename could be a relation
* fork (as opposed to an oddly named stray file that somehow ended
* up in the database directory). If the passed string begins with
- * a fork name (other than the main fork name), we return its length.
- * If not, we return 0.
+ * a fork name (other than the main fork name), we return its length,
+ * and set *fork (if not NULL) to the fork number. If not, we return 0.
*
* Note that the present coding assumes that there are no fork names which
* are prefixes of other fork names.
*/
int
-forkname_chars(const char *str)
+forkname_chars(const char *str, ForkNumber *fork)
{
ForkNumber forkNum;
@@ -97,7 +98,11 @@ forkname_chars(const char *str)
{
int len = strlen(forkNames[forkNum]);
if (strncmp(forkNames[forkNum], str, len) == 0)
+ {
+ if (fork)
+ *fork = forkNum;
return len;
+ }
}
return 0;
}
@@ -537,6 +542,7 @@ GetNewRelFileNode(Oid reltablespace, Relation pg_class, char relpersistence)
case RELPERSISTENCE_TEMP:
backend = MyBackendId;
break;
+ case RELPERSISTENCE_UNLOGGED:
case RELPERSISTENCE_PERMANENT:
backend = InvalidBackendId;
break;
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index bcf6caa2eef..8027d740f6d 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -1211,6 +1211,25 @@ heap_create_with_catalog(const char *relname,
register_on_commit_action(relid, oncommit);
/*
+ * If this is an unlogged relation, it needs an init fork so that it
+ * can be correctly reinitialized on restart. Since we're going to
+ * do an immediate sync, we ony need to xlog this if archiving or
+ * streaming is enabled. And the immediate sync is required, because
+ * otherwise there's no guarantee that this will hit the disk before
+ * the next checkpoint moves the redo pointer.
+ */
+ if (relpersistence == RELPERSISTENCE_UNLOGGED)
+ {
+ Assert(relkind == RELKIND_RELATION || relkind == RELKIND_TOASTVALUE);
+
+ smgrcreate(new_rel_desc->rd_smgr, INIT_FORKNUM, false);
+ if (XLogIsNeeded())
+ log_smgrcreate(&new_rel_desc->rd_smgr->smgr_rnode.node,
+ INIT_FORKNUM);
+ smgrimmedsync(new_rel_desc->rd_smgr, INIT_FORKNUM);
+ }
+
+ /*
* ok, the relation has been cataloged, so close our relations and return
* the OID of the newly created relation.
*/
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 8fbe8ebc91d..e50a084f003 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -1438,6 +1438,17 @@ index_build(Relation heapRelation,
Assert(PointerIsValid(stats));
/*
+ * If this is an unlogged index, we need to write out an init fork for it.
+ */
+ if (heapRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
+ {
+ RegProcedure ambuildempty = indexRelation->rd_am->ambuildempty;
+ RelationOpenSmgr(indexRelation);
+ smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
+ OidFunctionCall1(ambuildempty, PointerGetDatum(indexRelation));
+ }
+
+ /*
* If it's for an exclusion constraint, make a second pass over the heap
* to verify that the constraint is satisfied.
*/
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 671aaff133a..0bd0451f008 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -74,6 +74,7 @@ static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
typedef struct xl_smgr_create
{
RelFileNode rnode;
+ ForkNumber forkNum;
} xl_smgr_create;
typedef struct xl_smgr_truncate
@@ -98,9 +99,6 @@ void
RelationCreateStorage(RelFileNode rnode, char relpersistence)
{
PendingRelDelete *pending;
- XLogRecPtr lsn;
- XLogRecData rdata;
- xl_smgr_create xlrec;
SMgrRelation srel;
BackendId backend;
bool needs_wal;
@@ -111,6 +109,10 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence)
backend = MyBackendId;
needs_wal = false;
break;
+ case RELPERSISTENCE_UNLOGGED:
+ backend = InvalidBackendId;
+ needs_wal = false;
+ break;
case RELPERSISTENCE_PERMANENT:
backend = InvalidBackendId;
needs_wal = true;
@@ -124,19 +126,7 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence)
smgrcreate(srel, MAIN_FORKNUM, false);
if (needs_wal)
- {
- /*
- * Make an XLOG entry reporting the file creation.
- */
- xlrec.rnode = rnode;
-
- rdata.data = (char *) &xlrec;
- rdata.len = sizeof(xlrec);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
-
- lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE, &rdata);
- }
+ log_smgrcreate(&srel->smgr_rnode.node, MAIN_FORKNUM);
/* Add the relation to the list of stuff to delete at abort */
pending = (PendingRelDelete *)
@@ -150,6 +140,29 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence)
}
/*
+ * Perform XLogInsert of a XLOG_SMGR_CREATE record to WAL.
+ */
+void
+log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum)
+{
+ xl_smgr_create xlrec;
+ XLogRecData rdata;
+
+ /*
+ * Make an XLOG entry reporting the file creation.
+ */
+ xlrec.rnode = *rnode;
+ xlrec.forkNum = forkNum;
+
+ rdata.data = (char *) &xlrec;
+ rdata.len = sizeof(xlrec);
+ rdata.buffer = InvalidBuffer;
+ rdata.next = NULL;
+
+ XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE, &rdata);
+}
+
+/*
* RelationDropStorage
* Schedule unlinking of physical storage at transaction commit.
*/
@@ -478,7 +491,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
SMgrRelation reln;
reln = smgropen(xlrec->rnode, InvalidBackendId);
- smgrcreate(reln, MAIN_FORKNUM, true);
+ smgrcreate(reln, xlrec->forkNum, true);
}
else if (info == XLOG_SMGR_TRUNCATE)
{
@@ -523,7 +536,7 @@ smgr_desc(StringInfo buf, uint8 xl_info, char *rec)
if (info == XLOG_SMGR_CREATE)
{
xl_smgr_create *xlrec = (xl_smgr_create *) rec;
- char *path = relpathperm(xlrec->rnode, MAIN_FORKNUM);
+ char *path = relpathperm(xlrec->rnode, xlrec->forkNum);
appendStringInfo(buf, "file create: %s", path);
pfree(path);
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 6729d8336f5..3f6b814f02c 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -5128,12 +5128,12 @@ ATAddForeignKeyConstraint(AlteredTableInfo *tab, Relation rel,
RelationGetRelationName(pkrel))));
/*
- * References from permanent tables to temp tables are disallowed because
- * the contents of the temp table disappear at the end of each session.
- * References from temp tables to permanent tables are also disallowed,
- * because other backends might need to run the RI triggers on the perm
- * table, but they can't reliably see tuples in the local buffers of other
- * backends.
+ * References from permanent or unlogged tables to temp tables, and from
+ * permanent tables to unlogged tables, are disallowed because the
+ * referenced data can vanish out from under us. References from temp
+ * tables to any other table type are also disallowed, because other
+ * backends might need to run the RI triggers on the perm table, but they
+ * can't reliably see tuples in the local buffers of other backends.
*/
switch (rel->rd_rel->relpersistence)
{
@@ -5143,6 +5143,13 @@ ATAddForeignKeyConstraint(AlteredTableInfo *tab, Relation rel,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("constraints on permanent tables may reference only permanent tables")));
break;
+ case RELPERSISTENCE_UNLOGGED:
+ if (pkrel->rd_rel->relpersistence != RELPERSISTENCE_PERMANENT
+ && pkrel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("constraints on unlogged tables may reference only permanent or unlogged tables")));
+ break;
case RELPERSISTENCE_TEMP:
if (pkrel->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
ereport(ERROR,
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 37840baa0f6..26a5e84d44a 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -538,8 +538,8 @@ static RangeVar *makeRangeVarFromAnyName(List *names, int position, core_yyscan_
TO TRAILING TRANSACTION TREAT TRIGGER TRIM TRUE_P
TRUNCATE TRUSTED TYPE_P
- UNBOUNDED UNCOMMITTED UNENCRYPTED UNION UNIQUE UNKNOWN UNLISTEN UNTIL
- UPDATE USER USING
+ UNBOUNDED UNCOMMITTED UNENCRYPTED UNION UNIQUE UNKNOWN UNLISTEN UNLOGGED
+ UNTIL UPDATE USER USING
VACUUM VALID VALIDATOR VALUE_P VALUES VARCHAR VARIADIC VARYING
VERBOSE VERSION_P VIEW VOLATILE
@@ -2365,6 +2365,7 @@ OptTemp: TEMPORARY { $$ = RELPERSISTENCE_TEMP; }
| LOCAL TEMP { $$ = RELPERSISTENCE_TEMP; }
| GLOBAL TEMPORARY { $$ = RELPERSISTENCE_TEMP; }
| GLOBAL TEMP { $$ = RELPERSISTENCE_TEMP; }
+ | UNLOGGED { $$ = RELPERSISTENCE_UNLOGGED; }
| /*EMPTY*/ { $$ = RELPERSISTENCE_PERMANENT; }
;
@@ -7927,6 +7928,11 @@ OptTempTableName:
$$ = $4;
$$->relpersistence = RELPERSISTENCE_TEMP;
}
+ | UNLOGGED opt_table qualified_name
+ {
+ $$ = $3;
+ $$->relpersistence = RELPERSISTENCE_UNLOGGED;
+ }
| TABLE qualified_name
{
$$ = $2;
@@ -11395,6 +11401,7 @@ unreserved_keyword:
| UNENCRYPTED
| UNKNOWN
| UNLISTEN
+ | UNLOGGED
| UNTIL
| UPDATE
| VACUUM
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 860e736ff05..34e54536692 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -82,7 +82,7 @@ static bool IsForInput;
static volatile BufferDesc *PinCountWaitBuf = NULL;
-static Buffer ReadBuffer_common(SMgrRelation reln,
+static Buffer ReadBuffer_common(SMgrRelation reln, char relpersistence,
ForkNumber forkNum, BlockNumber blockNum,
ReadBufferMode mode, BufferAccessStrategy strategy,
bool *hit);
@@ -97,7 +97,9 @@ static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
int set_flag_bits);
static void shared_buffer_write_error_callback(void *arg);
static void local_buffer_write_error_callback(void *arg);
-static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
+static volatile BufferDesc *BufferAlloc(SMgrRelation smgr,
+ char relpersistence,
+ ForkNumber forkNum,
BlockNumber blockNum,
BufferAccessStrategy strategy,
bool *foundPtr);
@@ -241,8 +243,8 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
* miss.
*/
pgstat_count_buffer_read(reln);
- buf = ReadBuffer_common(reln->rd_smgr, forkNum, blockNum,
- mode, strategy, &hit);
+ buf = ReadBuffer_common(reln->rd_smgr, reln->rd_rel->relpersistence,
+ forkNum, blockNum, mode, strategy, &hit);
if (hit)
pgstat_count_buffer_hit(reln);
return buf;
@@ -253,10 +255,10 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
* ReadBufferWithoutRelcache -- like ReadBufferExtended, but doesn't require
* a relcache entry for the relation.
*
- * NB: At present, this function may not be used on temporary relations, which
+ * NB: At present, this function may only be used on permanent relations, which
* is OK, because we only use it during XLOG replay. If in the future we
- * want to use it on temporary relations, we could pass the backend ID as an
- * additional parameter.
+ * want to use it on temporary or unlogged relations, we could pass additional
+ * parameters.
*/
Buffer
ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum,
@@ -267,7 +269,8 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum,
SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
- return ReadBuffer_common(smgr, forkNum, blockNum, mode, strategy, &hit);
+ return ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum,
+ mode, strategy, &hit);
}
@@ -277,7 +280,7 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum,
* *hit is set to true if the request was satisfied from shared buffer cache.
*/
static Buffer
-ReadBuffer_common(SMgrRelation smgr, ForkNumber forkNum,
+ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
BlockNumber blockNum, ReadBufferMode mode,
BufferAccessStrategy strategy, bool *hit)
{
@@ -319,7 +322,8 @@ ReadBuffer_common(SMgrRelation smgr, ForkNumber forkNum,
* lookup the buffer. IO_IN_PROGRESS is set if the requested block is
* not currently in memory.
*/
- bufHdr = BufferAlloc(smgr, forkNum, blockNum, strategy, &found);
+ bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
+ strategy, &found);
if (found)
pgBufferUsage.shared_blks_hit++;
else
@@ -500,7 +504,7 @@ ReadBuffer_common(SMgrRelation smgr, ForkNumber forkNum,
* No locks are held either at entry or exit.
*/
static volatile BufferDesc *
-BufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
+BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
BlockNumber blockNum,
BufferAccessStrategy strategy,
bool *foundPtr)
@@ -797,8 +801,11 @@ BufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
* 1 so that the buffer can survive one clock-sweep pass.)
*/
buf->tag = newTag;
- buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR);
- buf->flags |= BM_TAG_VALID;
+ buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT);
+ if (relpersistence == RELPERSISTENCE_PERMANENT)
+ buf->flags |= BM_TAG_VALID | BM_PERMANENT;
+ else
+ buf->flags |= BM_TAG_VALID;
buf->usage_count = 1;
UnlockBufHdr(buf);
@@ -1155,8 +1162,10 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner)
* BufferSync -- Write out all dirty buffers in the pool.
*
* This is called at checkpoint time to write out all dirty shared buffers.
- * The checkpoint request flags should be passed in; currently the only one
- * examined is CHECKPOINT_IMMEDIATE, which disables delays between writes.
+ * The checkpoint request flags should be passed in. If CHECKPOINT_IMMEDIATE
+ * is set, we disable delays between writes; if CHECKPOINT_IS_SHUTDOWN is
+ * set, we write even unlogged buffers, which are otherwise skipped. The
+ * remaining flags currently have no effect here.
*/
static void
BufferSync(int flags)
@@ -1165,11 +1174,19 @@ BufferSync(int flags)
int num_to_scan;
int num_to_write;
int num_written;
+ int mask = BM_DIRTY;
/* Make sure we can handle the pin inside SyncOneBuffer */
ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
/*
+ * Unless this is a shutdown checkpoint, we write only permanent, dirty
+ * buffers. But at shutdown time, we write all dirty buffers.
+ */
+ if (!(flags & CHECKPOINT_IS_SHUTDOWN))
+ flags |= BM_PERMANENT;
+
+ /*
* Loop over all buffers, and mark the ones that need to be written with
* BM_CHECKPOINT_NEEDED. Count them as we go (num_to_write), so that we
* can estimate how much work needs to be done.
@@ -1196,7 +1213,7 @@ BufferSync(int flags)
*/
LockBufHdr(bufHdr);
- if (bufHdr->flags & BM_DIRTY)
+ if ((bufHdr->flags & mask) == mask)
{
bufHdr->flags |= BM_CHECKPOINT_NEEDED;
num_to_write++;
@@ -1897,12 +1914,12 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
* Determines the current number of pages in the relation.
*/
BlockNumber
-RelationGetNumberOfBlocks(Relation relation)
+RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum)
{
/* Open it at the smgr level if not already done */
RelationOpenSmgr(relation);
- return smgrnblocks(relation->rd_smgr, MAIN_FORKNUM);
+ return smgrnblocks(relation->rd_smgr, forkNum);
}
/* ---------------------------------------------------------------------
diff --git a/src/backend/storage/file/Makefile b/src/backend/storage/file/Makefile
index 3b93aa1b45d..d2198f2b93e 100644
--- a/src/backend/storage/file/Makefile
+++ b/src/backend/storage/file/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/storage/file
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = fd.o buffile.o copydir.o
+OBJS = fd.o buffile.o copydir.o reinit.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c
index f7dc509b500..587fb9260c0 100644
--- a/src/backend/storage/file/copydir.c
+++ b/src/backend/storage/file/copydir.c
@@ -38,7 +38,6 @@
#endif
-static void copy_file(char *fromfile, char *tofile);
static void fsync_fname(char *fname, bool isdir);
@@ -142,7 +141,7 @@ copydir(char *fromdir, char *todir, bool recurse)
/*
* copy one file
*/
-static void
+void
copy_file(char *fromfile, char *tofile)
{
char *buffer;
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 4f7dc39d638..a1dc18be44b 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -2055,7 +2055,7 @@ looks_like_temp_rel_name(const char *name)
/* We might have _forkname or .segment or both. */
if (name[pos] == '_')
{
- int forkchar = forkname_chars(&name[pos+1]);
+ int forkchar = forkname_chars(&name[pos+1], NULL);
if (forkchar <= 0)
return false;
pos += forkchar + 1;
diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c
new file mode 100644
index 00000000000..b75178b8045
--- /dev/null
+++ b/src/backend/storage/file/reinit.c
@@ -0,0 +1,396 @@
+/*-------------------------------------------------------------------------
+ *
+ * reinit.c
+ * Reinitialization of unlogged relations
+ *
+ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/storage/file/reinit.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <unistd.h>
+
+#include "catalog/catalog.h"
+#include "storage/copydir.h"
+#include "storage/fd.h"
+#include "storage/reinit.h"
+#include "utils/hsearch.h"
+#include "utils/memutils.h"
+
+static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname,
+ int op);
+static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname,
+ int op);
+static bool parse_filename_for_nontemp_relation(const char *name,
+ int *oidchars, ForkNumber *fork);
+
+typedef struct {
+ char oid[OIDCHARS+1];
+} unlogged_relation_entry;
+
+/*
+ * Reset unlogged relations from before the last restart.
+ *
+ * If op includes UNLOGGED_RELATION_CLEANUP, we remove all forks of any
+ * relation with an "init" fork, except for the "init" fork itself.
+ *
+ * If op includes UNLOGGED_RELATION_INIT, we copy the "init" fork to the main
+ * fork.
+ */
+void
+ResetUnloggedRelations(int op)
+{
+ char temp_path[MAXPGPATH];
+ DIR *spc_dir;
+ struct dirent *spc_de;
+ MemoryContext tmpctx, oldctx;
+
+ /* Log it. */
+ ereport(DEBUG1,
+ (errmsg("resetting unlogged relations: cleanup %d init %d",
+ (op & UNLOGGED_RELATION_CLEANUP) != 0,
+ (op & UNLOGGED_RELATION_INIT) != 0)));
+
+ /*
+ * Just to be sure we don't leak any memory, let's create a temporary
+ * memory context for this operation.
+ */
+ tmpctx = AllocSetContextCreate(CurrentMemoryContext,
+ "ResetUnloggedRelations",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ oldctx = MemoryContextSwitchTo(tmpctx);
+
+ /*
+ * First process unlogged files in pg_default ($PGDATA/base)
+ */
+ ResetUnloggedRelationsInTablespaceDir("base", op);
+
+ /*
+ * Cycle through directories for all non-default tablespaces.
+ */
+ spc_dir = AllocateDir("pg_tblspc");
+
+ while ((spc_de = ReadDir(spc_dir, "pg_tblspc")) != NULL)
+ {
+ if (strcmp(spc_de->d_name, ".") == 0 ||
+ strcmp(spc_de->d_name, "..") == 0)
+ continue;
+
+ snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s",
+ spc_de->d_name, TABLESPACE_VERSION_DIRECTORY);
+ ResetUnloggedRelationsInTablespaceDir(temp_path, op);
+ }
+
+ FreeDir(spc_dir);
+
+ /*
+ * Restore memory context.
+ */
+ MemoryContextSwitchTo(oldctx);
+ MemoryContextDelete(tmpctx);
+}
+
+/* Process one tablespace directory for ResetUnloggedRelations */
+static void
+ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
+{
+ DIR *ts_dir;
+ struct dirent *de;
+ char dbspace_path[MAXPGPATH];
+
+ ts_dir = AllocateDir(tsdirname);
+ if (ts_dir == NULL)
+ {
+ /* anything except ENOENT is fishy */
+ if (errno != ENOENT)
+ elog(LOG,
+ "could not open tablespace directory \"%s\": %m",
+ tsdirname);
+ return;
+ }
+
+ while ((de = ReadDir(ts_dir, tsdirname)) != NULL)
+ {
+ int i = 0;
+
+ /*
+ * We're only interested in the per-database directories, which have
+ * numeric names. Note that this code will also (properly) ignore "."
+ * and "..".
+ */
+ while (isdigit((unsigned char) de->d_name[i]))
+ ++i;
+ if (de->d_name[i] != '\0' || i == 0)
+ continue;
+
+ snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
+ tsdirname, de->d_name);
+ ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
+ }
+
+ FreeDir(ts_dir);
+}
+
+/* Process one per-dbspace directory for ResetUnloggedRelations */
+static void
+ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
+{
+ DIR *dbspace_dir;
+ struct dirent *de;
+ char rm_path[MAXPGPATH];
+
+ /* Caller must specify at least one operation. */
+ Assert((op & (UNLOGGED_RELATION_CLEANUP | UNLOGGED_RELATION_INIT)) != 0);
+
+ /*
+ * Cleanup is a two-pass operation. First, we go through and identify all
+ * the files with init forks. Then, we go through again and nuke
+ * everything with the same OID except the init fork.
+ */
+ if ((op & UNLOGGED_RELATION_CLEANUP) != 0)
+ {
+ HTAB *hash = NULL;
+ HASHCTL ctl;
+
+ /* Open the directory. */
+ dbspace_dir = AllocateDir(dbspacedirname);
+ if (dbspace_dir == NULL)
+ {
+ elog(LOG,
+ "could not open dbspace directory \"%s\": %m",
+ dbspacedirname);
+ return;
+ }
+
+ /*
+ * It's possible that someone could create a ton of unlogged relations
+ * in the same database & tablespace, so we'd better use a hash table
+ * rather than an array or linked list to keep track of which files
+ * need to be reset. Otherwise, this cleanup operation would be
+ * O(n^2).
+ */
+ ctl.keysize = sizeof(unlogged_relation_entry);
+ ctl.entrysize = sizeof(unlogged_relation_entry);
+ hash = hash_create("unlogged hash", 32, &ctl, HASH_ELEM);
+
+ /* Scan the directory. */
+ while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
+ {
+ ForkNumber forkNum;
+ int oidchars;
+ unlogged_relation_entry ent;
+
+ /* Skip anything that doesn't look like a relation data file. */
+ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
+ &forkNum))
+ continue;
+
+ /* Also skip it unless this is the init fork. */
+ if (forkNum != INIT_FORKNUM)
+ continue;
+
+ /*
+ * Put the OID portion of the name into the hash table, if it isn't
+ * already.
+ */
+ memset(ent.oid, 0, sizeof(ent.oid));
+ memcpy(ent.oid, de->d_name, oidchars);
+ hash_search(hash, &ent, HASH_ENTER, NULL);
+ }
+
+ /* Done with the first pass. */
+ FreeDir(dbspace_dir);
+
+ /*
+ * If we didn't find any init forks, there's no point in continuing;
+ * we can bail out now.
+ */
+ if (hash_get_num_entries(hash) == 0)
+ {
+ hash_destroy(hash);
+ return;
+ }
+
+ /*
+ * Now, make a second pass and remove anything that matches. First,
+ * reopen the directory.
+ */
+ dbspace_dir = AllocateDir(dbspacedirname);
+ if (dbspace_dir == NULL)
+ {
+ elog(LOG,
+ "could not open dbspace directory \"%s\": %m",
+ dbspacedirname);
+ hash_destroy(hash);
+ return;
+ }
+
+ /* Scan the directory. */
+ while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
+ {
+ ForkNumber forkNum;
+ int oidchars;
+ bool found;
+ unlogged_relation_entry ent;
+
+ /* Skip anything that doesn't look like a relation data file. */
+ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
+ &forkNum))
+ continue;
+
+ /* We never remove the init fork. */
+ if (forkNum == INIT_FORKNUM)
+ continue;
+
+ /*
+ * See whether the OID portion of the name shows up in the hash
+ * table.
+ */
+ memset(ent.oid, 0, sizeof(ent.oid));
+ memcpy(ent.oid, de->d_name, oidchars);
+ hash_search(hash, &ent, HASH_FIND, &found);
+
+ /* If so, nuke it! */
+ if (found)
+ {
+ snprintf(rm_path, sizeof(rm_path), "%s/%s",
+ dbspacedirname, de->d_name);
+ /*
+ * It's tempting to actually throw an error here, but since
+ * this code gets run during database startup, that could
+ * result in the database failing to start. (XXX Should we do
+ * it anyway?)
+ */
+ if (unlink(rm_path))
+ elog(LOG, "could not unlink file \"%s\": %m", rm_path);
+ else
+ elog(DEBUG2, "unlinked file \"%s\"", rm_path);
+ }
+ }
+
+ /* Cleanup is complete. */
+ FreeDir(dbspace_dir);
+ hash_destroy(hash);
+ }
+
+ /*
+ * Initialization happens after cleanup is complete: we copy each init
+ * fork file to the corresponding main fork file. Note that if we are
+ * asked to do both cleanup and init, we may never get here: if the cleanup
+ * code determines that there are no init forks in this dbspace, it will
+ * return before we get to this point.
+ */
+ if ((op & UNLOGGED_RELATION_INIT) != 0)
+ {
+ /* Open the directory. */
+ dbspace_dir = AllocateDir(dbspacedirname);
+ if (dbspace_dir == NULL)
+ {
+ /* we just saw this directory, so it really ought to be there */
+ elog(LOG,
+ "could not open dbspace directory \"%s\": %m",
+ dbspacedirname);
+ return;
+ }
+
+ /* Scan the directory. */
+ while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
+ {
+ ForkNumber forkNum;
+ int oidchars;
+ char oidbuf[OIDCHARS+1];
+ char srcpath[MAXPGPATH];
+ char dstpath[MAXPGPATH];
+
+ /* Skip anything that doesn't look like a relation data file. */
+ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
+ &forkNum))
+ continue;
+
+ /* Also skip it unless this is the init fork. */
+ if (forkNum != INIT_FORKNUM)
+ continue;
+
+ /* Construct source pathname. */
+ snprintf(srcpath, sizeof(srcpath), "%s/%s",
+ dbspacedirname, de->d_name);
+
+ /* Construct destination pathname. */
+ memcpy(oidbuf, de->d_name, oidchars);
+ oidbuf[oidchars] = '\0';
+ snprintf(dstpath, sizeof(dstpath), "%s/%s%s",
+ dbspacedirname, oidbuf, de->d_name + oidchars + 1 +
+ strlen(forkNames[INIT_FORKNUM]));
+
+ /* OK, we're ready to perform the actual copy. */
+ elog(DEBUG2, "copying %s to %s", srcpath, dstpath);
+ copy_file(srcpath, dstpath);
+ }
+
+ /* Done with the first pass. */
+ FreeDir(dbspace_dir);
+ }
+}
+
+/*
+ * Basic parsing of putative relation filenames.
+ *
+ * This funtion returns true if the file appears to be in the correct format
+ * for a non-temporary relation and false otherwise.
+ *
+ * NB: If this function returns true, the caller is entitled to assume that
+ * *oidchars has been set to the a value no more than OIDCHARS, and thus
+ * that a buffer of OIDCHARS+1 characters is sufficient to hold the OID
+ * portion of the filename. This is critical to protect against a possible
+ * buffer overrun.
+ */
+static bool
+parse_filename_for_nontemp_relation(const char *name, int *oidchars,
+ ForkNumber *fork)
+{
+ int pos;
+
+ /* Look for a non-empty string of digits (that isn't too long). */
+ for (pos = 0; isdigit((unsigned char) name[pos]); ++pos)
+ ;
+ if (pos == 0 || pos > OIDCHARS)
+ return false;
+ *oidchars = pos;
+
+ /* Check for a fork name. */
+ if (name[pos] != '_')
+ *fork = MAIN_FORKNUM;
+ else
+ {
+ int forkchar;
+
+ forkchar = forkname_chars(&name[pos+1], fork);
+ if (forkchar <= 0)
+ return false;
+ pos += forkchar + 1;
+ }
+
+ /* Check for a segment number. */
+ if (name[pos] == '.')
+ {
+ int segchar;
+ for (segchar = 1; isdigit((unsigned char) name[pos+segchar]); ++segchar)
+ ;
+ if (segchar <= 1)
+ return false;
+ pos += segchar;
+ }
+
+ /* Now we should be at the end. */
+ if (name[pos] != '\0')
+ return false;
+ return true;
+}
diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c
index e352cdafb3b..f33c29e4b21 100644
--- a/src/backend/utils/adt/dbsize.c
+++ b/src/backend/utils/adt/dbsize.c
@@ -615,6 +615,7 @@ pg_relation_filepath(PG_FUNCTION_ARGS)
/* Determine owning backend. */
switch (relform->relpersistence)
{
+ case RELPERSISTENCE_UNLOGGED:
case RELPERSISTENCE_PERMANENT:
backend = InvalidBackendId;
break;
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 1509686079b..fa9e9ca3a4e 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -851,6 +851,7 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
switch (relation->rd_rel->relpersistence)
{
+ case RELPERSISTENCE_UNLOGGED:
case RELPERSISTENCE_PERMANENT:
relation->rd_backend = InvalidBackendId;
break;
@@ -2490,6 +2491,7 @@ RelationBuildLocalRelation(const char *relname,
rel->rd_rel->relpersistence = relpersistence;
switch (relpersistence)
{
+ case RELPERSISTENCE_UNLOGGED:
case RELPERSISTENCE_PERMANENT:
rel->rd_backend = InvalidBackendId;
break;