diff options
Diffstat (limited to 'src/backend/storage/buffer/bufmgr.c')
-rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 172 |
1 files changed, 164 insertions, 8 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 11005edc735..d73a40c1bc6 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -38,6 +38,7 @@ #include "access/xlogutils.h" #include "catalog/catalog.h" #include "catalog/storage.h" +#include "catalog/storage_xlog.h" #include "executor/instrument.h" #include "lib/binaryheap.h" #include "miscadmin.h" @@ -486,6 +487,9 @@ static void FindAndDropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, BlockNumber nForkBlock, BlockNumber firstDelBlock); +static void RelationCopyStorageUsingBuffer(Relation src, Relation dst, + ForkNumber forkNum, + bool isunlogged); static void AtProcExit_Buffers(int code, Datum arg); static void CheckForBufferLeaks(void); static int rnode_comparator(const void *p1, const void *p2); @@ -772,23 +776,23 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, * ReadBufferWithoutRelcache -- like ReadBufferExtended, but doesn't require * a relcache entry for the relation. * - * NB: At present, this function may only be used on permanent relations, which - * is OK, because we only use it during XLOG replay. If in the future we - * want to use it on temporary or unlogged relations, we could pass additional - * parameters. + * Pass permanent = true for a RELPERSISTENCE_PERMANENT relation, and + * permanent = false for a RELPERSISTENCE_UNLOGGED relation. This function + * cannot be used for temporary relations (and making that work might be + * difficult, unless we only want to read temporary relations for our own + * BackendId). */ Buffer ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, - BufferAccessStrategy strategy) + BufferAccessStrategy strategy, bool permanent) { bool hit; SMgrRelation smgr = smgropen(rnode, InvalidBackendId); - Assert(InRecovery); - - return ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum, + return ReadBuffer_common(smgr, permanent ? RELPERSISTENCE_PERMANENT : + RELPERSISTENCE_UNLOGGED, forkNum, blockNum, mode, strategy, &hit); } @@ -3677,6 +3681,158 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels) } /* --------------------------------------------------------------------- + * RelationCopyStorageUsingBuffer + * + * Copy fork's data using bufmgr. Same as RelationCopyStorage but instead + * of using smgrread and smgrextend this will copy using bufmgr APIs. + * + * Refer comments atop CreateAndCopyRelationData() for details about + * 'permanent' parameter. + * -------------------------------------------------------------------- + */ +static void +RelationCopyStorageUsingBuffer(Relation src, Relation dst, ForkNumber forkNum, + bool permanent) +{ + Buffer srcBuf; + Buffer dstBuf; + Page srcPage; + Page dstPage; + bool use_wal; + BlockNumber nblocks; + BlockNumber blkno; + BufferAccessStrategy bstrategy_src; + BufferAccessStrategy bstrategy_dst; + + /* + * In general, we want to write WAL whenever wal_level > 'minimal', but + * we can skip it when copying any fork of an unlogged relation other + * than the init fork. + */ + use_wal = XLogIsNeeded() && (permanent || forkNum == INIT_FORKNUM); + + /* Get number of blocks in the source relation. */ + nblocks = smgrnblocks(RelationGetSmgr(src), forkNum); + + /* Nothing to copy; just return. */ + if (nblocks == 0) + return; + + /* This is a bulk operation, so use buffer access strategies. */ + bstrategy_src = GetAccessStrategy(BAS_BULKREAD); + bstrategy_dst = GetAccessStrategy(BAS_BULKWRITE); + + /* Iterate over each block of the source relation file. */ + for (blkno = 0; blkno < nblocks; blkno++) + { + CHECK_FOR_INTERRUPTS(); + + /* Read block from source relation. */ + srcBuf = ReadBufferWithoutRelcache(src->rd_node, forkNum, blkno, + RBM_NORMAL, bstrategy_src, + permanent); + srcPage = BufferGetPage(srcBuf); + if (PageIsNew(srcPage) || PageIsEmpty(srcPage)) + { + ReleaseBuffer(srcBuf); + continue; + } + + /* Use P_NEW to extend the destination relation. */ + dstBuf = ReadBufferWithoutRelcache(dst->rd_node, forkNum, P_NEW, + RBM_NORMAL, bstrategy_dst, + permanent); + LockBuffer(dstBuf, BUFFER_LOCK_EXCLUSIVE); + + START_CRIT_SECTION(); + + /* Copy page data from the source to the destination. */ + dstPage = BufferGetPage(dstBuf); + memcpy(dstPage, srcPage, BLCKSZ); + MarkBufferDirty(dstBuf); + + /* WAL-log the copied page. */ + if (use_wal) + log_newpage_buffer(dstBuf, true); + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(dstBuf); + ReleaseBuffer(srcBuf); + } +} + +/* --------------------------------------------------------------------- + * CreateAndCopyRelationData + * + * Create destination relation storage and copy all forks from the + * source relation to the destination. + * + * Pass permanent as true for permanent relations and false for + * unlogged relations. Currently this API is not supported for + * temporary relations. + * -------------------------------------------------------------------- + */ +void +CreateAndCopyRelationData(RelFileNode src_rnode, RelFileNode dst_rnode, + bool permanent) +{ + Relation src_rel; + Relation dst_rel; + char relpersistence; + + /* Set the relpersistence. */ + relpersistence = permanent ? + RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED; + + /* + * We can't use a real relcache entry for a relation in some other + * database, but since we're only going to access the fields related + * to physical storage, a fake one is good enough. If we didn't do this + * and used the smgr layer directly, we would have to worry about + * invalidations. + */ + src_rel = CreateFakeRelcacheEntry(src_rnode); + dst_rel = CreateFakeRelcacheEntry(dst_rnode); + + /* + * Create and copy all forks of the relation. During create database we + * have a separate cleanup mechanism which deletes complete database + * directory. Therefore, each individual relation doesn't need to be + * registered for cleanup. + */ + RelationCreateStorage(dst_rnode, relpersistence, false); + + /* copy main fork. */ + RelationCopyStorageUsingBuffer(src_rel, dst_rel, MAIN_FORKNUM, permanent); + + /* copy those extra forks that exist */ + for (ForkNumber forkNum = MAIN_FORKNUM + 1; + forkNum <= MAX_FORKNUM; forkNum++) + { + if (smgrexists(RelationGetSmgr(src_rel), forkNum)) + { + smgrcreate(RelationGetSmgr(dst_rel), forkNum, false); + + /* + * WAL log creation if the relation is persistent, or this is the + * init fork of an unlogged relation. + */ + if (permanent || forkNum == INIT_FORKNUM) + log_smgrcreate(&dst_rnode, forkNum); + + /* Copy a fork's data, block by block. */ + RelationCopyStorageUsingBuffer(src_rel, dst_rel, forkNum, + permanent); + } + } + + /* Release fake relcache entries. */ + FreeFakeRelcacheEntry(src_rel); + FreeFakeRelcacheEntry(dst_rel); +} + +/* --------------------------------------------------------------------- * FlushDatabaseBuffers * * This function writes all dirty pages of a database out to disk |