aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/catalog/storage.c44
-rw-r--r--src/backend/storage/smgr/md.c28
-rw-r--r--src/backend/storage/smgr/smgr.c14
3 files changed, 62 insertions, 24 deletions
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index b8baa3b822d..0ac91be63dc 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -280,6 +280,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
bool vm;
bool need_fsm_vacuum = false;
ForkNumber forks[MAX_FORKNUM];
+ BlockNumber old_blocks[MAX_FORKNUM];
BlockNumber blocks[MAX_FORKNUM];
int nforks = 0;
SMgrRelation reln;
@@ -295,6 +296,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
/* Prepare for truncation of MAIN fork of the relation */
forks[nforks] = MAIN_FORKNUM;
+ old_blocks[nforks] = smgrnblocks(reln, MAIN_FORKNUM);
blocks[nforks] = nblocks;
nforks++;
@@ -306,6 +308,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
if (BlockNumberIsValid(blocks[nforks]))
{
forks[nforks] = FSM_FORKNUM;
+ old_blocks[nforks] = smgrnblocks(reln, FSM_FORKNUM);
nforks++;
need_fsm_vacuum = true;
}
@@ -319,6 +322,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
if (BlockNumberIsValid(blocks[nforks]))
{
forks[nforks] = VISIBILITYMAP_FORKNUM;
+ old_blocks[nforks] = smgrnblocks(reln, VISIBILITYMAP_FORKNUM);
nforks++;
}
}
@@ -357,14 +361,20 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
MyProc->delayChkptEnd = true; /* DELAY_CHKPT_COMPLETE */
/*
- * We WAL-log the truncation before actually truncating, which means
- * trouble if the truncation fails. If we then crash, the WAL replay
- * likely isn't going to succeed in the truncation either, and cause a
- * PANIC. It's tempting to put a critical section here, but that cure
- * would be worse than the disease. It would turn a usually harmless
- * failure to truncate, that might spell trouble at WAL replay, into a
- * certain PANIC.
+ * We WAL-log the truncation first and then truncate in a critical
+ * section. Truncation drops buffers, even if dirty, and then truncates
+ * disk files. All of that work needs to complete before the lock is
+ * released, or else old versions of pages on disk that are missing recent
+ * changes would become accessible again. We'll try the whole operation
+ * again in crash recovery if we panic, but even then we can't give up
+ * because we don't want standbys' relation sizes to diverge and break
+ * replay or visibility invariants downstream. The critical section also
+ * suppresses interrupts.
+ *
+ * (See also pg_visibilitymap.c if changing this code.)
*/
+ START_CRIT_SECTION();
+
if (RelationNeedsWAL(rel))
{
/*
@@ -388,10 +398,10 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
* hit the disk before the WAL record, and the truncation of the FSM
* or visibility map. If we crashed during that window, we'd be left
* with a truncated heap, but the FSM or visibility map would still
- * contain entries for the non-existent heap pages.
+ * contain entries for the non-existent heap pages, and standbys would
+ * also never replay the truncation.
*/
- if (fsm || vm)
- XLogFlush(lsn);
+ XLogFlush(lsn);
}
/*
@@ -399,7 +409,9 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
* longer exist after truncation is complete, and then truncate the
* corresponding files on disk.
*/
- smgrtruncate(RelationGetSmgr(rel), forks, nforks, blocks);
+ smgrtruncate(RelationGetSmgr(rel), forks, nforks, old_blocks, blocks);
+
+ END_CRIT_SECTION();
/* We've done all the critical work, so checkpoints are OK now. */
MyProc->delayChkpt = false;
@@ -983,6 +995,7 @@ smgr_redo(XLogReaderState *record)
Relation rel;
ForkNumber forks[MAX_FORKNUM];
BlockNumber blocks[MAX_FORKNUM];
+ BlockNumber old_blocks[MAX_FORKNUM];
int nforks = 0;
bool need_fsm_vacuum = false;
@@ -1017,6 +1030,7 @@ smgr_redo(XLogReaderState *record)
if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
{
forks[nforks] = MAIN_FORKNUM;
+ old_blocks[nforks] = smgrnblocks(reln, MAIN_FORKNUM);
blocks[nforks] = xlrec->blkno;
nforks++;
@@ -1034,6 +1048,7 @@ smgr_redo(XLogReaderState *record)
if (BlockNumberIsValid(blocks[nforks]))
{
forks[nforks] = FSM_FORKNUM;
+ old_blocks[nforks] = smgrnblocks(reln, FSM_FORKNUM);
nforks++;
need_fsm_vacuum = true;
}
@@ -1045,13 +1060,18 @@ smgr_redo(XLogReaderState *record)
if (BlockNumberIsValid(blocks[nforks]))
{
forks[nforks] = VISIBILITYMAP_FORKNUM;
+ old_blocks[nforks] = smgrnblocks(reln, VISIBILITYMAP_FORKNUM);
nforks++;
}
}
/* Do the real work to truncate relation forks */
if (nforks > 0)
- smgrtruncate(reln, forks, nforks, blocks);
+ {
+ START_CRIT_SECTION();
+ smgrtruncate(reln, forks, nforks, old_blocks, blocks);
+ END_CRIT_SECTION();
+ }
/*
* Update upper-level FSM pages to account for the truncation. This is
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 72f1494c7ab..b48a6d49681 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -818,19 +818,21 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
/*
* mdtruncate() -- Truncate relation to specified number of blocks.
+ *
+ * Guaranteed not to allocate memory, so it can be used in a critical section.
+ * Caller must have called smgrnblocks() to obtain curnblk while holding a
+ * sufficient lock to prevent a change in relation size, and not used any smgr
+ * functions for this relation or handled interrupts in between. This makes
+ * sure we have opened all active segments, so that truncate loop will get
+ * them all!
*/
void
-mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
+mdtruncate(SMgrRelation reln, ForkNumber forknum,
+ BlockNumber curnblk, BlockNumber nblocks)
{
- BlockNumber curnblk;
BlockNumber priorblocks;
int curopensegs;
- /*
- * NOTE: mdnblocks makes sure we have opened all active segments, so that
- * truncation loop will get them all!
- */
- curnblk = mdnblocks(reln, forknum);
if (nblocks > curnblk)
{
/* Bogus request ... but no complaint if InRecovery */
@@ -1099,7 +1101,7 @@ _fdvec_resize(SMgrRelation reln,
reln->md_seg_fds[forknum] =
MemoryContextAlloc(MdCxt, sizeof(MdfdVec) * nseg);
}
- else
+ else if (nseg > reln->md_num_open_segs[forknum])
{
/*
* It doesn't seem worthwhile complicating the code to amortize
@@ -1111,6 +1113,16 @@ _fdvec_resize(SMgrRelation reln,
repalloc(reln->md_seg_fds[forknum],
sizeof(MdfdVec) * nseg);
}
+ else
+ {
+ /*
+ * We don't reallocate a smaller array, because we want mdtruncate()
+ * to be able to promise that it won't allocate memory, so that it is
+ * allowed in a critical section. This means that a bit of space in
+ * the array is now wasted, until the next time we add a segment and
+ * reallocate.
+ */
+ }
reln->md_num_open_segs[forknum] = nseg;
}
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 1d3bb92dfe1..9e6500ff981 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -60,7 +60,7 @@ typedef struct f_smgr
BlockNumber blocknum, BlockNumber nblocks);
BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
- BlockNumber nblocks);
+ BlockNumber old_blocks, BlockNumber nblocks);
void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
} f_smgr;
@@ -590,10 +590,15 @@ smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
*
* The caller must hold AccessExclusiveLock on the relation, to ensure that
* other backends receive the smgr invalidation event that this function sends
- * before they access any forks of the relation again.
+ * before they access any forks of the relation again. The current size of
+ * the forks should be provided in old_nblocks. This function should normally
+ * be called in a critical section, but the current size must be checked
+ * outside the critical section, and no interrupts or smgr functions relating
+ * to this relation should be called in between.
*/
void
-smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks)
+smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks,
+ BlockNumber *old_nblocks, BlockNumber *nblocks)
{
int i;
@@ -621,7 +626,8 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb
/* Make the cached size is invalid if we encounter an error. */
reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber;
- smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], nblocks[i]);
+ smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i],
+ old_nblocks[i], nblocks[i]);
/*
* We might as well update the local smgr_cached_nblocks values. The