diff options
Diffstat (limited to 'src/backend/storage')
-rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 87 | ||||
-rw-r--r-- | src/backend/storage/buffer/localbuf.c | 18 | ||||
-rw-r--r-- | src/backend/storage/file/fd.c | 136 | ||||
-rw-r--r-- | src/backend/storage/freespace/freespace.c | 4 | ||||
-rw-r--r-- | src/backend/storage/smgr/md.c | 92 | ||||
-rw-r--r-- | src/backend/storage/smgr/smgr.c | 67 |
6 files changed, 280 insertions, 124 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 3b6938135ac..4c09df1ba78 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.256 2010/02/26 02:00:59 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.257 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -95,7 +95,8 @@ static void WaitIO(volatile BufferDesc *buf); static bool StartBufferIO(volatile BufferDesc *buf, bool forInput); static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty, int set_flag_bits); -static void buffer_write_error_callback(void *arg); +static void shared_buffer_write_error_callback(void *arg); +static void local_buffer_write_error_callback(void *arg); static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, @@ -141,7 +142,8 @@ PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum) int buf_id; /* create a tag so we can lookup the buffer */ - INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode, forkNum, blockNum); + INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode.node, + forkNum, blockNum); /* determine its hash code and partition lock ID */ newHash = BufTableHashCode(&newTag); @@ -251,18 +253,21 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, * ReadBufferWithoutRelcache -- like ReadBufferExtended, but doesn't require * a relcache entry for the relation. * - * NB: caller is assumed to know what it's doing if isTemp is true. + * NB: At present, this function may not be used on temporary relations, which + * is OK, because we only use it during XLOG replay. If in the future we + * want to use it on temporary relations, we could pass the backend ID as an + * additional parameter. */ Buffer -ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp, - ForkNumber forkNum, BlockNumber blockNum, - ReadBufferMode mode, BufferAccessStrategy strategy) +ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, + BlockNumber blockNum, ReadBufferMode mode, + BufferAccessStrategy strategy) { bool hit; - SMgrRelation smgr = smgropen(rnode); + SMgrRelation smgr = smgropen(rnode, InvalidBackendId); - return ReadBuffer_common(smgr, isTemp, forkNum, blockNum, mode, strategy, + return ReadBuffer_common(smgr, false, forkNum, blockNum, mode, strategy, &hit); } @@ -414,7 +419,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum, { /* new buffers are zero-filled */ MemSet((char *) bufBlock, 0, BLCKSZ); - smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, isLocalBuf); + smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, false); } else { @@ -465,10 +470,10 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum, VacuumCostBalance += VacuumCostPageMiss; TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum, - smgr->smgr_rnode.spcNode, - smgr->smgr_rnode.dbNode, - smgr->smgr_rnode.relNode, - isLocalBuf, + smgr->smgr_rnode.node.spcNode, + smgr->smgr_rnode.node.dbNode, + smgr->smgr_rnode.node.relNode, + smgr->smgr_rnode.backend, isExtend, found); @@ -512,7 +517,7 @@ BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, bool valid; /* create a tag so we can lookup the buffer */ - INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum); + INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum); /* determine its hash code and partition lock ID */ newHash = BufTableHashCode(&newTag); @@ -1693,21 +1698,24 @@ PrintBufferLeakWarning(Buffer buffer) volatile BufferDesc *buf; int32 loccount; char *path; + BackendId backend; Assert(BufferIsValid(buffer)); if (BufferIsLocal(buffer)) { buf = &LocalBufferDescriptors[-buffer - 1]; loccount = LocalRefCount[-buffer - 1]; + backend = MyBackendId; } else { buf = &BufferDescriptors[buffer - 1]; loccount = PrivateRefCount[buffer - 1]; + backend = InvalidBackendId; } /* theoretically we should lock the bufhdr here */ - path = relpath(buf->tag.rnode, buf->tag.forkNum); + path = relpathbackend(buf->tag.rnode, backend, buf->tag.forkNum); elog(WARNING, "buffer refcount leak: [%03d] " "(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)", @@ -1831,14 +1839,14 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln) return; /* Setup error traceback support for ereport() */ - errcontext.callback = buffer_write_error_callback; + errcontext.callback = shared_buffer_write_error_callback; errcontext.arg = (void *) buf; errcontext.previous = error_context_stack; error_context_stack = &errcontext; /* Find smgr relation for buffer */ if (reln == NULL) - reln = smgropen(buf->tag.rnode); + reln = smgropen(buf->tag.rnode, InvalidBackendId); TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum, buf->tag.blockNum, @@ -1929,14 +1937,15 @@ RelationGetNumberOfBlocks(Relation relation) * -------------------------------------------------------------------- */ void -DropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, bool istemp, +DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum, BlockNumber firstDelBlock) { int i; - if (istemp) + if (rnode.backend != InvalidBackendId) { - DropRelFileNodeLocalBuffers(rnode, forkNum, firstDelBlock); + if (rnode.backend == MyBackendId) + DropRelFileNodeLocalBuffers(rnode.node, forkNum, firstDelBlock); return; } @@ -1945,7 +1954,7 @@ DropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, bool istemp, volatile BufferDesc *bufHdr = &BufferDescriptors[i]; LockBufHdr(bufHdr); - if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) && + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) && bufHdr->tag.forkNum == forkNum && bufHdr->tag.blockNum >= firstDelBlock) InvalidateBuffer(bufHdr); /* releases spinlock */ @@ -2008,7 +2017,7 @@ PrintBufferDescs(void) "[%02d] (freeNext=%d, rel=%s, " "blockNum=%u, flags=0x%x, refcount=%u %d)", i, buf->freeNext, - relpath(buf->tag.rnode, buf->tag.forkNum), + relpathbackend(buf->tag.rnode, InvalidBackendId, buf->tag.forkNum), buf->tag.blockNum, buf->flags, buf->refcount, PrivateRefCount[i]); } @@ -2078,7 +2087,7 @@ FlushRelationBuffers(Relation rel) ErrorContextCallback errcontext; /* Setup error traceback support for ereport() */ - errcontext.callback = buffer_write_error_callback; + errcontext.callback = local_buffer_write_error_callback; errcontext.arg = (void *) bufHdr; errcontext.previous = error_context_stack; error_context_stack = &errcontext; @@ -2087,7 +2096,7 @@ FlushRelationBuffers(Relation rel) bufHdr->tag.forkNum, bufHdr->tag.blockNum, (char *) LocalBufHdrGetBlock(bufHdr), - true); + false); bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); @@ -2699,8 +2708,9 @@ AbortBufferIO(void) if (sv_flags & BM_IO_ERROR) { /* Buffer is pinned, so we can read tag without spinlock */ - char *path = relpath(buf->tag.rnode, buf->tag.forkNum); + char *path; + path = relpathperm(buf->tag.rnode, buf->tag.forkNum); ereport(WARNING, (errcode(ERRCODE_IO_ERROR), errmsg("could not write block %u of %s", @@ -2714,17 +2724,36 @@ AbortBufferIO(void) } /* - * Error context callback for errors occurring during buffer writes. + * Error context callback for errors occurring during shared buffer writes. */ static void -buffer_write_error_callback(void *arg) +shared_buffer_write_error_callback(void *arg) { volatile BufferDesc *bufHdr = (volatile BufferDesc *) arg; /* Buffer is pinned, so we can read the tag without locking the spinlock */ if (bufHdr != NULL) { - char *path = relpath(bufHdr->tag.rnode, bufHdr->tag.forkNum); + char *path = relpathperm(bufHdr->tag.rnode, bufHdr->tag.forkNum); + + errcontext("writing block %u of relation %s", + bufHdr->tag.blockNum, path); + pfree(path); + } +} + +/* + * Error context callback for errors occurring during local buffer writes. + */ +static void +local_buffer_write_error_callback(void *arg) +{ + volatile BufferDesc *bufHdr = (volatile BufferDesc *) arg; + + if (bufHdr != NULL) + { + char *path = relpathbackend(bufHdr->tag.rnode, MyBackendId, + bufHdr->tag.forkNum); errcontext("writing block %u of relation %s", bufHdr->tag.blockNum, path); diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 2b783f87f45..dd067737c99 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.89 2010/01/02 16:57:51 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.90 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -68,7 +68,7 @@ LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum, BufferTag newTag; /* identity of requested block */ LocalBufferLookupEnt *hresult; - INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum); + INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum); /* Initialize local buffers if first request in this session */ if (LocalBufHash == NULL) @@ -110,7 +110,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, int trycounter; bool found; - INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum); + INIT_BUFFERTAG(newTag, smgr->smgr_rnode.node, forkNum, blockNum); /* Initialize local buffers if first request in this session */ if (LocalBufHash == NULL) @@ -127,7 +127,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, Assert(BUFFERTAGS_EQUAL(bufHdr->tag, newTag)); #ifdef LBDEBUG fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n", - smgr->smgr_rnode.relNode, forkNum, blockNum, -b - 1); + smgr->smgr_rnode.node.relNode, forkNum, blockNum, -b - 1); #endif /* this part is equivalent to PinBuffer for a shared buffer */ if (LocalRefCount[b] == 0) @@ -150,7 +150,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, #ifdef LBDEBUG fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n", - smgr->smgr_rnode.relNode, forkNum, blockNum, -nextFreeLocalBuf - 1); + smgr->smgr_rnode.node.relNode, forkNum, blockNum, + -nextFreeLocalBuf - 1); #endif /* @@ -198,14 +199,14 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, SMgrRelation oreln; /* Find smgr relation for buffer */ - oreln = smgropen(bufHdr->tag.rnode); + oreln = smgropen(bufHdr->tag.rnode, MyBackendId); /* And write... */ smgrwrite(oreln, bufHdr->tag.forkNum, bufHdr->tag.blockNum, (char *) LocalBufHdrGetBlock(bufHdr), - true); + false); /* Mark not-dirty now in case we error out below */ bufHdr->flags &= ~BM_DIRTY; @@ -309,7 +310,8 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum, if (LocalRefCount[i] != 0) elog(ERROR, "block %u of %s is still referenced (local %u)", bufHdr->tag.blockNum, - relpath(bufHdr->tag.rnode, bufHdr->tag.forkNum), + relpathbackend(bufHdr->tag.rnode, MyBackendId, + bufHdr->tag.forkNum), LocalRefCount[i]); /* Remove entry from hashtable */ hresult = (LocalBufferLookupEnt *) diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 91bf4af8e4d..18d6de1dec7 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.157 2010/07/06 22:55:26 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.158 2010/08/13 20:10:52 rhaas Exp $ * * NOTES: * @@ -249,6 +249,9 @@ static File OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError); static void AtProcExit_Files(int code, Datum arg); static void CleanupTempFiles(bool isProcExit); static void RemovePgTempFilesInDir(const char *tmpdirname); +static void RemovePgTempRelationFiles(const char *tsdirname); +static void RemovePgTempRelationFilesInDbspace(const char *dbspacedirname); +static bool looks_like_temp_rel_name(const char *name); /* @@ -1824,10 +1827,12 @@ CleanupTempFiles(bool isProcExit) /* - * Remove temporary files left over from a prior postmaster session + * Remove temporary and temporary relation files left over from a prior + * postmaster session * * This should be called during postmaster startup. It will forcibly - * remove any leftover files created by OpenTemporaryFile. + * remove any leftover files created by OpenTemporaryFile and any leftover + * temporary relation files created by mdcreate. * * NOTE: we could, but don't, call this during a post-backend-crash restart * cycle. The argument for not doing it is that someone might want to examine @@ -1847,6 +1852,7 @@ RemovePgTempFiles(void) */ snprintf(temp_path, sizeof(temp_path), "base/%s", PG_TEMP_FILES_DIR); RemovePgTempFilesInDir(temp_path); + RemovePgTempRelationFiles("base"); /* * Cycle through temp directories for all non-default tablespaces. @@ -1862,6 +1868,10 @@ RemovePgTempFiles(void) snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s/%s", spc_de->d_name, TABLESPACE_VERSION_DIRECTORY, PG_TEMP_FILES_DIR); RemovePgTempFilesInDir(temp_path); + + snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s", + spc_de->d_name, TABLESPACE_VERSION_DIRECTORY); + RemovePgTempRelationFiles(temp_path); } FreeDir(spc_dir); @@ -1915,3 +1925,123 @@ RemovePgTempFilesInDir(const char *tmpdirname) FreeDir(temp_dir); } + +/* Process one tablespace directory, look for per-DB subdirectories */ +static void +RemovePgTempRelationFiles(const char *tsdirname) +{ + DIR *ts_dir; + struct dirent *de; + char dbspace_path[MAXPGPATH]; + + ts_dir = AllocateDir(tsdirname); + if (ts_dir == NULL) + { + /* anything except ENOENT is fishy */ + if (errno != ENOENT) + elog(LOG, + "could not open tablespace directory \"%s\": %m", + tsdirname); + return; + } + + while ((de = ReadDir(ts_dir, tsdirname)) != NULL) + { + int i = 0; + + /* + * We're only interested in the per-database directories, which have + * numeric names. Note that this code will also (properly) ignore "." + * and "..". + */ + while (isdigit((unsigned char) de->d_name[i])) + ++i; + if (de->d_name[i] != '\0' || i == 0) + continue; + + snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s", + tsdirname, de->d_name); + RemovePgTempRelationFilesInDbspace(dbspace_path); + } + + FreeDir(ts_dir); +} + +/* Process one per-dbspace directory for RemovePgTempRelationFiles */ +static void +RemovePgTempRelationFilesInDbspace(const char *dbspacedirname) +{ + DIR *dbspace_dir; + struct dirent *de; + char rm_path[MAXPGPATH]; + + dbspace_dir = AllocateDir(dbspacedirname); + if (dbspace_dir == NULL) + { + /* we just saw this directory, so it really ought to be there */ + elog(LOG, + "could not open dbspace directory \"%s\": %m", + dbspacedirname); + return; + } + + while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) + { + if (!looks_like_temp_rel_name(de->d_name)) + continue; + + snprintf(rm_path, sizeof(rm_path), "%s/%s", + dbspacedirname, de->d_name); + + unlink(rm_path); /* note we ignore any error */ + } + + FreeDir(dbspace_dir); +} + +/* t<digits>_<digits>, or t<digits>_<digits>_<forkname> */ +static bool +looks_like_temp_rel_name(const char *name) +{ + int pos; + int savepos; + + /* Must start with "t". */ + if (name[0] != 't') + return false; + + /* Followed by a non-empty string of digits and then an underscore. */ + for (pos = 1; isdigit((unsigned char) name[pos]); ++pos) + ; + if (pos == 1 || name[pos] != '_') + return false; + + /* Followed by another nonempty string of digits. */ + for (savepos = ++pos; isdigit((unsigned char) name[pos]); ++pos) + ; + if (savepos == pos) + return false; + + /* We might have _forkname or .segment or both. */ + if (name[pos] == '_') + { + int forkchar = forkname_chars(&name[pos+1]); + if (forkchar <= 0) + return false; + pos += forkchar + 1; + } + if (name[pos] == '.') + { + int segchar; + for (segchar = 1; isdigit((unsigned char) name[pos+segchar]); ++segchar) + ; + if (segchar <= 1) + return false; + pos += segchar; + } + + /* Now we should be at the end. */ + if (name[pos] != '\0') + return false; + return true; +} diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index a872f1e78fb..040dd3344cd 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.77 2010/02/26 02:00:59 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.78 2010/08/13 20:10:52 rhaas Exp $ * * * NOTES: @@ -303,7 +303,7 @@ FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks) } /* Truncate the unused FSM pages, and send smgr inval message */ - smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks, rel->rd_istemp); + smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks); /* * We might as well update the local smgr_fsm_nblocks setting. diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index eb5c73d6f8d..f1ff2fe15e2 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.151 2010/02/26 02:01:01 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.152 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -119,7 +119,7 @@ static MemoryContext MdCxt; /* context for all md.c allocations */ */ typedef struct { - RelFileNode rnode; /* the targeted relation */ + RelFileNodeBackend rnode; /* the targeted relation */ ForkNumber forknum; BlockNumber segno; /* which segment */ } PendingOperationTag; @@ -135,7 +135,7 @@ typedef struct typedef struct { - RelFileNode rnode; /* the dead relation to delete */ + RelFileNodeBackend rnode; /* the dead relation to delete */ CycleCtr cycle_ctr; /* mdckpt_cycle_ctr when request was made */ } PendingUnlinkEntry; @@ -158,14 +158,14 @@ static MdfdVec *mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior); static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg); -static void register_unlink(RelFileNode rnode); +static void register_unlink(RelFileNodeBackend rnode); static MdfdVec *_fdvec_alloc(void); static char *_mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno); static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forkno, BlockNumber segno, int oflags); static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno, - BlockNumber blkno, bool isTemp, ExtensionBehavior behavior); + BlockNumber blkno, bool skipFsync, ExtensionBehavior behavior); static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg); @@ -321,7 +321,7 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo) * we are usually not in a transaction anymore when this is called. */ void -mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo) +mdunlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo) { char *path; int ret; @@ -417,7 +417,7 @@ mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo) */ void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer, bool isTemp) + char *buffer, bool skipFsync) { off_t seekpos; int nbytes; @@ -440,7 +440,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, relpath(reln->smgr_rnode, forknum), InvalidBlockNumber))); - v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_CREATE); + v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE); seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -478,7 +478,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, errhint("Check free disk space."))); } - if (!isTemp) + if (!skipFsync && !SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); @@ -605,9 +605,10 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, MdfdVec *v; TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode); + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend); v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL); @@ -624,9 +625,10 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ); TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend, nbytes, BLCKSZ); @@ -666,7 +668,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, */ void mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer, bool isTemp) + char *buffer, bool skipFsync) { off_t seekpos; int nbytes; @@ -678,11 +680,12 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, #endif TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode); + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend); - v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_FAIL); + v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_FAIL); seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -697,9 +700,10 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ); TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum, - reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, - reln->smgr_rnode.relNode, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend, nbytes, BLCKSZ); @@ -720,7 +724,7 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, errhint("Check free disk space."))); } - if (!isTemp) + if (!skipFsync && !SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); } @@ -794,8 +798,7 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum) * mdtruncate() -- Truncate relation to specified number of blocks. */ void -mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, - bool isTemp) +mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) { MdfdVec *v; BlockNumber curnblk; @@ -839,7 +842,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, errmsg("could not truncate file \"%s\": %m", FilePathName(v->mdfd_vfd)))); - if (!isTemp) + if (!SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); v = v->mdfd_chain; Assert(ov != reln->md_fd[forknum]); /* we never drop the 1st @@ -864,7 +867,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, errmsg("could not truncate file \"%s\" to %u blocks: %m", FilePathName(v->mdfd_vfd), nblocks))); - if (!isTemp) + if (!SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); v = v->mdfd_chain; ov->mdfd_chain = NULL; @@ -1052,7 +1055,8 @@ mdsync(void) * the relation will have been dirtied through this same smgr * relation, and so we can save a file open/close cycle. */ - reln = smgropen(entry->tag.rnode); + reln = smgropen(entry->tag.rnode.node, + entry->tag.rnode.backend); /* * It is possible that the relation has been dropped or @@ -1235,7 +1239,7 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) * a remote pending-ops table. */ static void -register_unlink(RelFileNode rnode) +register_unlink(RelFileNodeBackend rnode) { if (pendingOpsTable) { @@ -1278,7 +1282,8 @@ register_unlink(RelFileNode rnode) * structure for them.) */ void -RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) +RememberFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, + BlockNumber segno) { Assert(pendingOpsTable); @@ -1291,7 +1296,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) hash_seq_init(&hstat, pendingOpsTable); while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL) { - if (RelFileNodeEquals(entry->tag.rnode, rnode) && + if (RelFileNodeBackendEquals(entry->tag.rnode, rnode) && entry->tag.forknum == forknum) { /* Okay, cancel this entry */ @@ -1312,7 +1317,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) hash_seq_init(&hstat, pendingOpsTable); while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL) { - if (entry->tag.rnode.dbNode == rnode.dbNode) + if (entry->tag.rnode.node.dbNode == rnode.node.dbNode) { /* Okay, cancel this entry */ entry->canceled = true; @@ -1326,7 +1331,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(cell); next = lnext(cell); - if (entry->rnode.dbNode == rnode.dbNode) + if (entry->rnode.node.dbNode == rnode.node.dbNode) { pendingUnlinks = list_delete_cell(pendingUnlinks, cell, prev); pfree(entry); @@ -1393,7 +1398,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno) * ForgetRelationFsyncRequests -- forget any fsyncs for a rel */ void -ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum) +ForgetRelationFsyncRequests(RelFileNodeBackend rnode, ForkNumber forknum) { if (pendingOpsTable) { @@ -1428,11 +1433,12 @@ ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum) void ForgetDatabaseFsyncRequests(Oid dbid) { - RelFileNode rnode; + RelFileNodeBackend rnode; - rnode.dbNode = dbid; - rnode.spcNode = 0; - rnode.relNode = 0; + rnode.node.dbNode = dbid; + rnode.node.spcNode = 0; + rnode.node.relNode = 0; + rnode.backend = InvalidBackendId; if (pendingOpsTable) { @@ -1523,12 +1529,12 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, * specified block. * * If the segment doesn't exist, we ereport, return NULL, or create the - * segment, according to "behavior". Note: isTemp need only be correct - * in the EXTENSION_CREATE case. + * segment, according to "behavior". Note: skipFsync is only used in the + * EXTENSION_CREATE case. */ static MdfdVec * _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, - bool isTemp, ExtensionBehavior behavior) + bool skipFsync, ExtensionBehavior behavior) { MdfdVec *v = mdopen(reln, forknum, behavior); BlockNumber targetseg; @@ -1566,7 +1572,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, mdextend(reln, forknum, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1, - zerobuf, isTemp); + zerobuf, skipFsync); pfree(zerobuf); } v->mdfd_chain = _mdfd_openseg(reln, forknum, +nextsegno, O_CREAT); diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 7a35b0a8333..c1d14492227 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.121 2010/02/26 02:01:01 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.122 2010/08/13 20:10:52 rhaas Exp $ * *------------------------------------------------------------------------- */ @@ -45,19 +45,19 @@ typedef struct f_smgr void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, bool isRedo); bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_unlink) (RelFileNode rnode, ForkNumber forknum, + void (*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo); void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool isTemp); + BlockNumber blocknum, char *buffer, bool skipFsync); void (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); void (*smgr_read) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer); void (*smgr_write) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool isTemp); + BlockNumber blocknum, char *buffer, bool skipFsync); BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, - BlockNumber nblocks, bool isTemp); + BlockNumber nblocks); void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); void (*smgr_pre_ckpt) (void); /* may be NULL */ void (*smgr_sync) (void); /* may be NULL */ @@ -83,8 +83,6 @@ static HTAB *SMgrRelationHash = NULL; /* local function prototypes */ static void smgrshutdown(int code, Datum arg); -static void smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum, - int which, bool isTemp, bool isRedo); /* @@ -131,8 +129,9 @@ smgrshutdown(int code, Datum arg) * This does not attempt to actually open the object. */ SMgrRelation -smgropen(RelFileNode rnode) +smgropen(RelFileNode rnode, BackendId backend) { + RelFileNodeBackend brnode; SMgrRelation reln; bool found; @@ -142,7 +141,7 @@ smgropen(RelFileNode rnode) HASHCTL ctl; MemSet(&ctl, 0, sizeof(ctl)); - ctl.keysize = sizeof(RelFileNode); + ctl.keysize = sizeof(RelFileNodeBackend); ctl.entrysize = sizeof(SMgrRelationData); ctl.hash = tag_hash; SMgrRelationHash = hash_create("smgr relation table", 400, @@ -150,8 +149,10 @@ smgropen(RelFileNode rnode) } /* Look up or create an entry */ + brnode.node = rnode; + brnode.backend = backend; reln = (SMgrRelation) hash_search(SMgrRelationHash, - (void *) &rnode, + (void *) &brnode, HASH_ENTER, &found); /* Initialize it if not present before */ @@ -261,7 +262,7 @@ smgrcloseall(void) * such entry exists already. */ void -smgrclosenode(RelFileNode rnode) +smgrclosenode(RelFileNodeBackend rnode) { SMgrRelation reln; @@ -305,8 +306,8 @@ smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) * should be here and not in commands/tablespace.c? But that would imply * importing a lot of stuff that smgr.c oughtn't know, either. */ - TablespaceCreateDbspace(reln->smgr_rnode.spcNode, - reln->smgr_rnode.dbNode, + TablespaceCreateDbspace(reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, isRedo); (*(smgrsw[reln->smgr_which].smgr_create)) (reln, forknum, isRedo); @@ -323,29 +324,19 @@ smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) * already. */ void -smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isTemp, bool isRedo) +smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isRedo) { - RelFileNode rnode = reln->smgr_rnode; + RelFileNodeBackend rnode = reln->smgr_rnode; int which = reln->smgr_which; /* Close the fork */ (*(smgrsw[which].smgr_close)) (reln, forknum); - smgr_internal_unlink(rnode, forknum, which, isTemp, isRedo); -} - -/* - * Shared subroutine that actually does the unlink ... - */ -static void -smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum, - int which, bool isTemp, bool isRedo) -{ /* * Get rid of any remaining buffers for the relation. bufmgr will just * drop them without bothering to write the contents. */ - DropRelFileNodeBuffers(rnode, forknum, isTemp, 0); + DropRelFileNodeBuffers(rnode, forknum, 0); /* * It'd be nice to tell the stats collector to forget it immediately, too. @@ -385,10 +376,10 @@ smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum, */ void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer, bool isTemp) + char *buffer, bool skipFsync) { (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, forknum, blocknum, - buffer, isTemp); + buffer, skipFsync); } /* @@ -426,16 +417,16 @@ smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, * on disk at return, only dumped out to the kernel. However, * provisions will be made to fsync the write before the next checkpoint. * - * isTemp indicates that the relation is a temp table (ie, is managed - * by the local-buffer manager). In this case no provisions need be - * made to fsync the write before checkpointing. + * skipFsync indicates that the caller will make other provisions to + * fsync the relation, so we needn't bother. Temporary relations also + * do not require fsync. */ void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer, bool isTemp) + char *buffer, bool skipFsync) { (*(smgrsw[reln->smgr_which].smgr_write)) (reln, forknum, blocknum, - buffer, isTemp); + buffer, skipFsync); } /* @@ -455,14 +446,13 @@ smgrnblocks(SMgrRelation reln, ForkNumber forknum) * The truncation is done immediately, so this can't be rolled back. */ void -smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, - bool isTemp) +smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) { /* * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will * just drop them without bothering to write the contents. */ - DropRelFileNodeBuffers(reln->smgr_rnode, forknum, isTemp, nblocks); + DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nblocks); /* * Send a shared-inval message to force other backends to close any smgr @@ -479,8 +469,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, /* * Do the truncation. */ - (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks, - isTemp); + (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks); } /* @@ -499,7 +488,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks, * to use the WAL log for PITR or replication purposes: in that case * we have to make WAL entries as well.) * - * The preceding writes should specify isTemp = true to avoid + * The preceding writes should specify skipFsync = true to avoid * duplicative fsyncs. * * Note that you need to do FlushRelationBuffers() first if there is |