diff options
Diffstat (limited to 'src/backend/catalog/storage.c')
-rw-r--r-- | src/backend/catalog/storage.c | 44 |
1 files changed, 32 insertions, 12 deletions
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index bb8c4d15612..5b22cf10990 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -291,6 +291,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks) bool vm; bool need_fsm_vacuum = false; ForkNumber forks[MAX_FORKNUM]; + BlockNumber old_blocks[MAX_FORKNUM]; BlockNumber blocks[MAX_FORKNUM]; int nforks = 0; SMgrRelation reln; @@ -306,6 +307,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks) /* Prepare for truncation of MAIN fork of the relation */ forks[nforks] = MAIN_FORKNUM; + old_blocks[nforks] = smgrnblocks(reln, MAIN_FORKNUM); blocks[nforks] = nblocks; nforks++; @@ -317,6 +319,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks) if (BlockNumberIsValid(blocks[nforks])) { forks[nforks] = FSM_FORKNUM; + old_blocks[nforks] = smgrnblocks(reln, FSM_FORKNUM); nforks++; need_fsm_vacuum = true; } @@ -330,6 +333,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks) if (BlockNumberIsValid(blocks[nforks])) { forks[nforks] = VISIBILITYMAP_FORKNUM; + old_blocks[nforks] = smgrnblocks(reln, VISIBILITYMAP_FORKNUM); nforks++; } } @@ -366,14 +370,20 @@ RelationTruncate(Relation rel, BlockNumber nblocks) MyProc->delayChkptFlags |= DELAY_CHKPT_START | DELAY_CHKPT_COMPLETE; /* - * We WAL-log the truncation before actually truncating, which means - * trouble if the truncation fails. If we then crash, the WAL replay - * likely isn't going to succeed in the truncation either, and cause a - * PANIC. It's tempting to put a critical section here, but that cure - * would be worse than the disease. It would turn a usually harmless - * failure to truncate, that might spell trouble at WAL replay, into a - * certain PANIC. + * We WAL-log the truncation first and then truncate in a critical + * section. Truncation drops buffers, even if dirty, and then truncates + * disk files. All of that work needs to complete before the lock is + * released, or else old versions of pages on disk that are missing recent + * changes would become accessible again. We'll try the whole operation + * again in crash recovery if we panic, but even then we can't give up + * because we don't want standbys' relation sizes to diverge and break + * replay or visibility invariants downstream. The critical section also + * suppresses interrupts. + * + * (See also pg_visibilitymap.c if changing this code.) */ + START_CRIT_SECTION(); + if (RelationNeedsWAL(rel)) { /* @@ -397,10 +407,10 @@ RelationTruncate(Relation rel, BlockNumber nblocks) * hit the disk before the WAL record, and the truncation of the FSM * or visibility map. If we crashed during that window, we'd be left * with a truncated heap, but the FSM or visibility map would still - * contain entries for the non-existent heap pages. + * contain entries for the non-existent heap pages, and standbys would + * also never replay the truncation. */ - if (fsm || vm) - XLogFlush(lsn); + XLogFlush(lsn); } /* @@ -408,7 +418,9 @@ RelationTruncate(Relation rel, BlockNumber nblocks) * longer exist after truncation is complete, and then truncate the * corresponding files on disk. */ - smgrtruncate(RelationGetSmgr(rel), forks, nforks, blocks); + smgrtruncate(RelationGetSmgr(rel), forks, nforks, old_blocks, blocks); + + END_CRIT_SECTION(); /* We've done all the critical work, so checkpoints are OK now. */ MyProc->delayChkptFlags &= ~(DELAY_CHKPT_START | DELAY_CHKPT_COMPLETE); @@ -973,6 +985,7 @@ smgr_redo(XLogReaderState *record) Relation rel; ForkNumber forks[MAX_FORKNUM]; BlockNumber blocks[MAX_FORKNUM]; + BlockNumber old_blocks[MAX_FORKNUM]; int nforks = 0; bool need_fsm_vacuum = false; @@ -1007,6 +1020,7 @@ smgr_redo(XLogReaderState *record) if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0) { forks[nforks] = MAIN_FORKNUM; + old_blocks[nforks] = smgrnblocks(reln, MAIN_FORKNUM); blocks[nforks] = xlrec->blkno; nforks++; @@ -1024,6 +1038,7 @@ smgr_redo(XLogReaderState *record) if (BlockNumberIsValid(blocks[nforks])) { forks[nforks] = FSM_FORKNUM; + old_blocks[nforks] = smgrnblocks(reln, FSM_FORKNUM); nforks++; need_fsm_vacuum = true; } @@ -1035,13 +1050,18 @@ smgr_redo(XLogReaderState *record) if (BlockNumberIsValid(blocks[nforks])) { forks[nforks] = VISIBILITYMAP_FORKNUM; + old_blocks[nforks] = smgrnblocks(reln, VISIBILITYMAP_FORKNUM); nforks++; } } /* Do the real work to truncate relation forks */ if (nforks > 0) - smgrtruncate(reln, forks, nforks, blocks); + { + START_CRIT_SECTION(); + smgrtruncate(reln, forks, nforks, old_blocks, blocks); + END_CRIT_SECTION(); + } /* * Update upper-level FSM pages to account for the truncation. This is |