diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2010-02-09 21:43:30 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2010-02-09 21:43:30 +0000 |
commit | cbe9d6beb4ae1cb20c08cab29b534be4923b6768 (patch) | |
tree | a9476492cd8c7eda7718f95b0ad5a45d41e55a3a /src/backend/storage | |
parent | 79647eed86cc972e80ea165dcb0b7f6fef876169 (diff) | |
download | postgresql-cbe9d6beb4ae1cb20c08cab29b534be4923b6768.tar.gz postgresql-cbe9d6beb4ae1cb20c08cab29b534be4923b6768.zip |
Fix up rickety handling of relation-truncation interlocks.
Move rd_targblock, rd_fsm_nblocks, and rd_vm_nblocks from relcache to the smgr
relation entries, so that they will get reset to InvalidBlockNumber whenever
an smgr-level flush happens. Because we now send smgr invalidation messages
immediately (not at end of transaction) when a relation truncation occurs,
this ensures that other backends will reset their values before they next
access the relation. We no longer need the unreliable assumption that a
VACUUM that's doing a truncation will hold its AccessExclusive lock until
commit --- in fact, we can intentionally release that lock as soon as we've
completed the truncation. This patch therefore reverts (most of) Alvaro's
patch of 2009-11-10, as well as my marginal hacking on it yesterday. We can
also get rid of assorted no-longer-needed relcache flushes, which are far more
expensive than an smgr flush because they kill a lot more state.
In passing this patch fixes smgr_redo's failure to perform visibility-map
truncation, and cleans up some rather dubious assumptions in freespace.c and
visibilitymap.c about when rd_fsm_nblocks and rd_vm_nblocks can be out of
date.
Diffstat (limited to 'src/backend/storage')
-rw-r--r-- | src/backend/storage/freespace/freespace.c | 76 | ||||
-rw-r--r-- | src/backend/storage/smgr/smgr.c | 43 |
2 files changed, 68 insertions, 51 deletions
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index fb7bd7d83f8..b584c1fe380 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.75 2010/02/09 00:28:57 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.76 2010/02/09 21:43:30 tgl Exp $ * * * NOTES: @@ -25,16 +25,16 @@ #include "access/htup.h" #include "access/xlogutils.h" -#include "storage/bufpage.h" +#include "miscadmin.h" #include "storage/bufmgr.h" +#include "storage/bufpage.h" #include "storage/freespace.h" #include "storage/fsm_internals.h" #include "storage/lmgr.h" #include "storage/lwlock.h" #include "storage/smgr.h" #include "utils/rel.h" -#include "utils/inval.h" -#include "miscadmin.h" + /* * We use just one byte to store the amount of free space on a page, so we @@ -251,9 +251,9 @@ GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk) /* * FreeSpaceMapTruncateRel - adjust for truncation of a relation. * - * The caller must hold AccessExclusiveLock on the relation, to ensure - * that other backends receive the relcache invalidation event that this - * function sends, before accessing the FSM again. + * The caller must hold AccessExclusiveLock on the relation, to ensure that + * other backends receive the smgr invalidation event that this function sends + * before they access the FSM again. * * nblocks is the new size of the heap. */ @@ -302,17 +302,18 @@ FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks) return; /* nothing to do; the FSM was already smaller */ } - /* Truncate the unused FSM pages */ + /* Truncate the unused FSM pages, and send smgr inval message */ smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks, rel->rd_istemp); /* - * Need to invalidate the relcache entry, because rd_fsm_nblocks seen by - * other backends is no longer valid. + * We might as well update the local smgr_fsm_nblocks setting. + * smgrtruncate sent an smgr cache inval message, which will cause + * other backends to invalidate their copy of smgr_fsm_nblocks, and + * this one too at the next command boundary. But this ensures it + * isn't outright wrong until then. */ - if (!InRecovery) - CacheInvalidateRelcache(rel); - - rel->rd_fsm_nblocks = new_nfsmblocks; + if (rel->rd_smgr) + rel->rd_smgr->smgr_fsm_nblocks = new_nfsmblocks; } /* @@ -506,17 +507,24 @@ fsm_readbuf(Relation rel, FSMAddress addr, bool extend) RelationOpenSmgr(rel); - /* If we haven't cached the size of the FSM yet, check it first */ - if (rel->rd_fsm_nblocks == InvalidBlockNumber) + /* + * If we haven't cached the size of the FSM yet, check it first. Also + * recheck if the requested block seems to be past end, since our + * cached value might be stale. (We send smgr inval messages on + * truncation, but not on extension.) + */ + if (rel->rd_smgr->smgr_fsm_nblocks == InvalidBlockNumber || + blkno >= rel->rd_smgr->smgr_fsm_nblocks) { if (smgrexists(rel->rd_smgr, FSM_FORKNUM)) - rel->rd_fsm_nblocks = smgrnblocks(rel->rd_smgr, FSM_FORKNUM); + rel->rd_smgr->smgr_fsm_nblocks = smgrnblocks(rel->rd_smgr, + FSM_FORKNUM); else - rel->rd_fsm_nblocks = 0; + rel->rd_smgr->smgr_fsm_nblocks = 0; } /* Handle requests beyond EOF */ - if (blkno >= rel->rd_fsm_nblocks) + if (blkno >= rel->rd_smgr->smgr_fsm_nblocks) { if (extend) fsm_extend(rel, blkno + 1); @@ -559,19 +567,23 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks) * it. * * Note that another backend might have extended or created the relation - * before we get the lock. + * by the time we get the lock. */ LockRelationForExtension(rel, ExclusiveLock); - /* Create the FSM file first if it doesn't exist */ - if ((rel->rd_fsm_nblocks == 0 || rel->rd_fsm_nblocks == InvalidBlockNumber) - && !smgrexists(rel->rd_smgr, FSM_FORKNUM)) - { + /* Might have to re-open if a cache flush happened */ + RelationOpenSmgr(rel); + + /* + * Create the FSM file first if it doesn't exist. If smgr_fsm_nblocks + * is positive then it must exist, no need for an smgrexists call. + */ + if ((rel->rd_smgr->smgr_fsm_nblocks == 0 || + rel->rd_smgr->smgr_fsm_nblocks == InvalidBlockNumber) && + !smgrexists(rel->rd_smgr, FSM_FORKNUM)) smgrcreate(rel->rd_smgr, FSM_FORKNUM, false); - fsm_nblocks_now = 0; - } - else - fsm_nblocks_now = smgrnblocks(rel->rd_smgr, FSM_FORKNUM); + + fsm_nblocks_now = smgrnblocks(rel->rd_smgr, FSM_FORKNUM); while (fsm_nblocks_now < fsm_nblocks) { @@ -580,14 +592,12 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks) fsm_nblocks_now++; } + /* Update local cache with the up-to-date size */ + rel->rd_smgr->smgr_fsm_nblocks = fsm_nblocks_now; + UnlockRelationForExtension(rel, ExclusiveLock); pfree(pg); - - /* Update the relcache with the up-to-date size */ - if (!InRecovery) - CacheInvalidateRelcache(rel); - rel->rd_fsm_nblocks = fsm_nblocks_now; } /* diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 958be2433fb..87ae9dbe4b1 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.119 2010/02/03 01:14:17 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.120 2010/02/09 21:43:30 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -161,6 +161,9 @@ smgropen(RelFileNode rnode) /* hash_search already filled in the lookup key */ reln->smgr_owner = NULL; + reln->smgr_targblock = InvalidBlockNumber; + reln->smgr_fsm_nblocks = InvalidBlockNumber; + reln->smgr_vm_nblocks = InvalidBlockNumber; reln->smgr_which = 0; /* we only have md.c at present */ /* mark it not open */ @@ -352,6 +355,16 @@ smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum, */ /* + * Send a shared-inval message to force other backends to close any + * dangling smgr references they may have for this rel. We should do + * this before starting the actual unlinking, in case we fail partway + * through that step. Note that the sinval message will eventually come + * back to this backend, too, and thereby provide a backstop that we + * closed our own smgr rel. + */ + CacheInvalidateSmgr(rnode); + + /* * Delete the physical file(s). * * Note: smgr_unlink must treat deletion failure as a WARNING, not an @@ -359,14 +372,6 @@ smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum, * xact. */ (*(smgrsw[which].smgr_unlink)) (rnode, forknum, isRedo); - - /* - * Lastly, send a shared-inval message to force other backends to close - * any dangling smgr references they may have for this rel. We do this - * last because the sinval will eventually come back to this backend, too, - * and thereby provide a backstop that we closed our own smgr rel. - */ - CacheInvalidateSmgr(rnode); } /* @@ -460,20 +465,22 @@ smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, DropRelFileNodeBuffers(reln->smgr_rnode, forknum, isTemp, nblocks); /* - * Do the truncation. - */ - (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks, - isTemp); - - /* * Send a shared-inval message to force other backends to close any smgr * references they may have for this rel. This is useful because they - * might have open file pointers to segments that got removed. (The inval + * might have open file pointers to segments that got removed, and/or + * smgr_targblock variables pointing past the new rel end. (The inval * message will come back to our backend, too, causing a - * probably-unnecessary smgr flush. But we don't expect that this is - * a performance-critical path.) + * probably-unnecessary local smgr flush. But we don't expect that this + * is a performance-critical path.) As in the unlink code, we want to + * be sure the message is sent before we start changing things on-disk. */ CacheInvalidateSmgr(reln->smgr_rnode); + + /* + * Do the truncation. + */ + (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks, + isTemp); } /* |