aboutsummaryrefslogtreecommitdiff
path: root/src/backend/catalog/storage.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/catalog/storage.c')
-rw-r--r--src/backend/catalog/storage.c44
1 files changed, 32 insertions, 12 deletions
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index bb8c4d15612..5b22cf10990 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -291,6 +291,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
bool vm;
bool need_fsm_vacuum = false;
ForkNumber forks[MAX_FORKNUM];
+ BlockNumber old_blocks[MAX_FORKNUM];
BlockNumber blocks[MAX_FORKNUM];
int nforks = 0;
SMgrRelation reln;
@@ -306,6 +307,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
/* Prepare for truncation of MAIN fork of the relation */
forks[nforks] = MAIN_FORKNUM;
+ old_blocks[nforks] = smgrnblocks(reln, MAIN_FORKNUM);
blocks[nforks] = nblocks;
nforks++;
@@ -317,6 +319,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
if (BlockNumberIsValid(blocks[nforks]))
{
forks[nforks] = FSM_FORKNUM;
+ old_blocks[nforks] = smgrnblocks(reln, FSM_FORKNUM);
nforks++;
need_fsm_vacuum = true;
}
@@ -330,6 +333,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
if (BlockNumberIsValid(blocks[nforks]))
{
forks[nforks] = VISIBILITYMAP_FORKNUM;
+ old_blocks[nforks] = smgrnblocks(reln, VISIBILITYMAP_FORKNUM);
nforks++;
}
}
@@ -366,14 +370,20 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
MyProc->delayChkptFlags |= DELAY_CHKPT_START | DELAY_CHKPT_COMPLETE;
/*
- * We WAL-log the truncation before actually truncating, which means
- * trouble if the truncation fails. If we then crash, the WAL replay
- * likely isn't going to succeed in the truncation either, and cause a
- * PANIC. It's tempting to put a critical section here, but that cure
- * would be worse than the disease. It would turn a usually harmless
- * failure to truncate, that might spell trouble at WAL replay, into a
- * certain PANIC.
+ * We WAL-log the truncation first and then truncate in a critical
+ * section. Truncation drops buffers, even if dirty, and then truncates
+ * disk files. All of that work needs to complete before the lock is
+ * released, or else old versions of pages on disk that are missing recent
+ * changes would become accessible again. We'll try the whole operation
+ * again in crash recovery if we panic, but even then we can't give up
+ * because we don't want standbys' relation sizes to diverge and break
+ * replay or visibility invariants downstream. The critical section also
+ * suppresses interrupts.
+ *
+ * (See also pg_visibilitymap.c if changing this code.)
*/
+ START_CRIT_SECTION();
+
if (RelationNeedsWAL(rel))
{
/*
@@ -397,10 +407,10 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
* hit the disk before the WAL record, and the truncation of the FSM
* or visibility map. If we crashed during that window, we'd be left
* with a truncated heap, but the FSM or visibility map would still
- * contain entries for the non-existent heap pages.
+ * contain entries for the non-existent heap pages, and standbys would
+ * also never replay the truncation.
*/
- if (fsm || vm)
- XLogFlush(lsn);
+ XLogFlush(lsn);
}
/*
@@ -408,7 +418,9 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
* longer exist after truncation is complete, and then truncate the
* corresponding files on disk.
*/
- smgrtruncate(RelationGetSmgr(rel), forks, nforks, blocks);
+ smgrtruncate(RelationGetSmgr(rel), forks, nforks, old_blocks, blocks);
+
+ END_CRIT_SECTION();
/* We've done all the critical work, so checkpoints are OK now. */
MyProc->delayChkptFlags &= ~(DELAY_CHKPT_START | DELAY_CHKPT_COMPLETE);
@@ -973,6 +985,7 @@ smgr_redo(XLogReaderState *record)
Relation rel;
ForkNumber forks[MAX_FORKNUM];
BlockNumber blocks[MAX_FORKNUM];
+ BlockNumber old_blocks[MAX_FORKNUM];
int nforks = 0;
bool need_fsm_vacuum = false;
@@ -1007,6 +1020,7 @@ smgr_redo(XLogReaderState *record)
if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
{
forks[nforks] = MAIN_FORKNUM;
+ old_blocks[nforks] = smgrnblocks(reln, MAIN_FORKNUM);
blocks[nforks] = xlrec->blkno;
nforks++;
@@ -1024,6 +1038,7 @@ smgr_redo(XLogReaderState *record)
if (BlockNumberIsValid(blocks[nforks]))
{
forks[nforks] = FSM_FORKNUM;
+ old_blocks[nforks] = smgrnblocks(reln, FSM_FORKNUM);
nforks++;
need_fsm_vacuum = true;
}
@@ -1035,13 +1050,18 @@ smgr_redo(XLogReaderState *record)
if (BlockNumberIsValid(blocks[nforks]))
{
forks[nforks] = VISIBILITYMAP_FORKNUM;
+ old_blocks[nforks] = smgrnblocks(reln, VISIBILITYMAP_FORKNUM);
nforks++;
}
}
/* Do the real work to truncate relation forks */
if (nforks > 0)
- smgrtruncate(reln, forks, nforks, blocks);
+ {
+ START_CRIT_SECTION();
+ smgrtruncate(reln, forks, nforks, old_blocks, blocks);
+ END_CRIT_SECTION();
+ }
/*
* Update upper-level FSM pages to account for the truncation. This is