diff options
Diffstat (limited to 'src/backend/storage/smgr')
-rw-r--r-- | src/backend/storage/smgr/md.c | 166 | ||||
-rw-r--r-- | src/backend/storage/smgr/mm.c | 35 | ||||
-rw-r--r-- | src/backend/storage/smgr/smgr.c | 119 |
3 files changed, 57 insertions, 263 deletions
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 978d85d4868..25051a9799c 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.91 2002/06/20 20:29:35 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.92 2002/08/06 02:36:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -381,16 +381,7 @@ mdclose_fd(int fd) /* if not closed already */ if (v->mdfd_vfd >= 0) - { - /* - * We sync the file descriptor so that we don't need to reopen - * it at transaction commit to force changes to disk. (This - * is not really optional, because we are about to forget that - * the file even exists...) - */ - FileSync(v->mdfd_vfd); FileClose(v->mdfd_vfd); - } /* Now free vector */ v = v->mdfd_chain; if (ov != &Md_fdvec[fd]) @@ -403,16 +394,7 @@ mdclose_fd(int fd) if (v != (MdfdVec *) NULL) { if (v->mdfd_vfd >= 0) - { - /* - * We sync the file descriptor so that we don't need to reopen - * it at transaction commit to force changes to disk. (This - * is not really optional, because we are about to forget that - * the file even exists...) - */ - FileSync(v->mdfd_vfd); FileClose(v->mdfd_vfd); - } } #endif @@ -498,55 +480,15 @@ mdwrite(Relation reln, BlockNumber blocknum, char *buffer) } /* - * mdflush() -- Synchronously write a block to disk. - * - * This is exactly like mdwrite(), but doesn't return until the file - * system buffer cache has been flushed. - */ -int -mdflush(Relation reln, BlockNumber blocknum, char *buffer) -{ - int status; - long seekpos; - MdfdVec *v; - - v = _mdfd_getseg(reln, blocknum); - -#ifndef LET_OS_MANAGE_FILESIZE - seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE))); -#ifdef DIAGNOSTIC - if (seekpos >= BLCKSZ * RELSEG_SIZE) - elog(FATAL, "seekpos too big!"); -#endif -#else - seekpos = (long) (BLCKSZ * (blocknum)); -#endif - - if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) - return SM_FAIL; - - /* write and sync the block */ - status = SM_SUCCESS; - if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ - || FileSync(v->mdfd_vfd) < 0) - status = SM_FAIL; - - return status; -} - -/* * mdblindwrt() -- Write a block to disk blind. * - * We have to be able to do this using only the name and OID of - * the database and relation in which the block belongs. Otherwise - * this is much like mdwrite(). If dofsync is TRUE, then we fsync - * the file, making it more like mdflush(). + * We have to be able to do this using only the rnode of the relation + * in which the block belongs. Otherwise this is much like mdwrite(). */ int mdblindwrt(RelFileNode rnode, BlockNumber blkno, - char *buffer, - bool dofsync) + char *buffer) { int status; long seekpos; @@ -568,7 +510,6 @@ mdblindwrt(RelFileNode rnode, #endif errno = 0; - if (lseek(fd, seekpos, SEEK_SET) != seekpos) { elog(LOG, "mdblindwrt: lseek(%ld) failed: %m", seekpos); @@ -578,7 +519,7 @@ mdblindwrt(RelFileNode rnode, status = SM_SUCCESS; - /* write and optionally sync the block */ + /* write the block */ errno = 0; if (write(fd, buffer, BLCKSZ) != BLCKSZ) { @@ -599,54 +540,6 @@ mdblindwrt(RelFileNode rnode, } /* - * mdmarkdirty() -- Mark the specified block "dirty" (ie, needs fsync). - * - * Returns SM_SUCCESS or SM_FAIL. - */ -int -mdmarkdirty(Relation reln, BlockNumber blkno) -{ - MdfdVec *v; - - v = _mdfd_getseg(reln, blkno); - - FileMarkDirty(v->mdfd_vfd); - - return SM_SUCCESS; -} - -/* - * mdblindmarkdirty() -- Mark the specified block "dirty" (ie, needs fsync). - * - * We have to be able to do this using only the name and OID of - * the database and relation in which the block belongs. Otherwise - * this is much like mdmarkdirty(). However, we do the fsync immediately - * rather than building md/fd datastructures to postpone it till later. - */ -int -mdblindmarkdirty(RelFileNode rnode, - BlockNumber blkno) -{ - int status; - int fd; - - fd = _mdfd_blind_getseg(rnode, blkno); - - if (fd < 0) - return SM_FAIL; - - status = SM_SUCCESS; - - if (pg_fsync(fd) < 0) - status = SM_FAIL; - - if (close(fd) < 0) - status = SM_FAIL; - - return status; -} - -/* * mdnblocks() -- Get the number of blocks stored in a relation. * * Important side effect: all segments of the relation are opened @@ -796,61 +689,36 @@ mdtruncate(Relation reln, BlockNumber nblocks) /* * mdcommit() -- Commit a transaction. * - * All changes to magnetic disk relations must be forced to stable - * storage. This routine makes a pass over the private table of - * file descriptors. Any descriptors to which we have done writes, - * but not synced, are synced here. - * * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */ int -mdcommit() +mdcommit(void) { - int i; - MdfdVec *v; - - for (i = 0; i < CurFd; i++) - { - v = &Md_fdvec[i]; - if (v->mdfd_flags & MDFD_FREE) - continue; - /* Sync the file entry */ -#ifndef LET_OS_MANAGE_FILESIZE - for (; v != (MdfdVec *) NULL; v = v->mdfd_chain) -#else - if (v != (MdfdVec *) NULL) -#endif - { - if (FileSync(v->mdfd_vfd) < 0) - return SM_FAIL; - } - } - + /* + * We don't actually have to do anything here... + */ return SM_SUCCESS; } /* * mdabort() -- Abort a transaction. * - * Changes need not be forced to disk at transaction abort. We mark - * all file descriptors as clean here. Always returns SM_SUCCESS. + * Changes need not be forced to disk at transaction abort. */ int -mdabort() +mdabort(void) { /* - * We don't actually have to do anything here. fd.c will discard - * fsync-needed bits in its AtEOXact_Files() routine. + * We don't actually have to do anything here... */ return SM_SUCCESS; } /* - * mdsync() -- Sync storage. - * + * mdsync() -- Sync previous writes to stable storage. */ int -mdsync() +mdsync(void) { sync(); if (IsUnderPostmaster) @@ -861,11 +729,9 @@ mdsync() /* * _fdvec_alloc () -- grab a free (or new) md file descriptor vector. - * */ -static -int -_fdvec_alloc() +static int +_fdvec_alloc(void) { MdfdVec *nvec; int fdvec, diff --git a/src/backend/storage/smgr/mm.c b/src/backend/storage/smgr/mm.c index 89396d173c9..739e938fe28 100644 --- a/src/backend/storage/smgr/mm.c +++ b/src/backend/storage/smgr/mm.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.31 2002/06/20 20:29:36 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.32 2002/08/06 02:36:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -81,7 +81,7 @@ static HTAB *MMCacheHT; static HTAB *MMRelCacheHT; int -mminit() +mminit(void) { char *mmcacheblk; int mmsize = 0; @@ -151,7 +151,7 @@ mminit() } int -mmshutdown() +mmshutdown(void) { return SM_SUCCESS; } @@ -443,30 +443,15 @@ mmwrite(Relation reln, BlockNumber blocknum, char *buffer) } /* - * mmflush() -- Synchronously write a block to stable storage. - * - * For main-memory relations, this is exactly equivalent to mmwrite(). - */ -int -mmflush(Relation reln, BlockNumber blocknum, char *buffer) -{ - return mmwrite(reln, blocknum, buffer); -} - -/* * mmblindwrt() -- Write a block to stable storage blind. * - * We have to be able to do this using only the name and OID of - * the database and relation in which the block belongs. + * We have to be able to do this using only the rnode of the relation + * in which the block belongs. Otherwise this is much like mmwrite(). */ int -mmblindwrt(char *dbstr, - char *relstr, - Oid dbid, - Oid relid, +mmblindwrt(RelFileNode rnode, BlockNumber blkno, - char *buffer, - bool dofsync) + char *buffer) { return SM_FAIL; } @@ -512,7 +497,7 @@ mmnblocks(Relation reln) * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */ int -mmcommit() +mmcommit(void) { return SM_SUCCESS; } @@ -522,7 +507,7 @@ mmcommit() */ int -mmabort() +mmabort(void) { return SM_SUCCESS; } @@ -536,7 +521,7 @@ mmabort() * manager will use. */ int -MMShmemSize() +MMShmemSize(void) { int size = 0; diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index a7fb23b4427..252781d9c3f 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.57 2002/06/20 20:29:36 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.58 2002/08/06 02:36:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -40,12 +40,8 @@ typedef struct f_smgr char *buffer); int (*smgr_write) (Relation reln, BlockNumber blocknum, char *buffer); - int (*smgr_flush) (Relation reln, BlockNumber blocknum, - char *buffer); int (*smgr_blindwrt) (RelFileNode rnode, BlockNumber blkno, - char *buffer, bool dofsync); - int (*smgr_markdirty) (Relation reln, BlockNumber blkno); - int (*smgr_blindmarkdirty) (RelFileNode, BlockNumber blkno); + char *buffer); BlockNumber (*smgr_nblocks) (Relation reln); BlockNumber (*smgr_truncate) (Relation reln, BlockNumber nblocks); int (*smgr_commit) (void); /* may be NULL */ @@ -62,15 +58,15 @@ static f_smgr smgrsw[] = { /* magnetic disk */ {mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose, - mdread, mdwrite, mdflush, mdblindwrt, mdmarkdirty, mdblindmarkdirty, + mdread, mdwrite, mdblindwrt, mdnblocks, mdtruncate, mdcommit, mdabort, mdsync }, #ifdef STABLE_MEMORY_STORAGE /* main memory */ {mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose, - mmread, mmwrite, mmflush, mmblindwrt, mmmarkdirty, mmblindmarkdirty, - mmnblocks, NULL, mmcommit, mmabort}, + mmread, mmwrite, mmblindwrt, + mmnblocks, NULL, mmcommit, mmabort, NULL}, #endif }; @@ -110,6 +106,7 @@ typedef struct PendingRelDelete { RelFileNode relnode; /* relation that may need to be deleted */ int16 which; /* which storage manager? */ + bool isTemp; /* is it a temporary relation? */ bool atCommit; /* T=delete at commit; F=delete at abort */ struct PendingRelDelete *next; /* linked-list link */ } PendingRelDelete; @@ -123,7 +120,7 @@ static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */ * */ int -smgrinit() +smgrinit(void) { int i; @@ -181,6 +178,7 @@ smgrcreate(int16 which, Relation reln) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = reln->rd_node; pending->which = which; + pending->isTemp = reln->rd_istemp; pending->atCommit = false; /* delete if abort */ pending->next = pendingDeletes; pendingDeletes = pending; @@ -208,6 +206,7 @@ smgrunlink(int16 which, Relation reln) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = reln->rd_node; pending->which = which; + pending->isTemp = reln->rd_istemp; pending->atCommit = true; /* delete if commit */ pending->next = pendingDeletes; pendingDeletes = pending; @@ -312,8 +311,10 @@ smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer) /* * smgrwrite() -- Write the supplied buffer out. * - * This is not a synchronous write -- the interface for that is - * smgrflush(). The buffer is written out via the appropriate + * This is not a synchronous write -- the block is not necessarily + * on disk at return, only dumped out to the kernel. + * + * The buffer is written out via the appropriate * storage manager. This routine returns SM_SUCCESS or aborts * the current transaction. */ @@ -332,23 +333,6 @@ smgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer) } /* - * smgrflush() -- A synchronous smgrwrite(). - */ -int -smgrflush(int16 which, Relation reln, BlockNumber blocknum, char *buffer) -{ - int status; - - status = (*(smgrsw[which].smgr_flush)) (reln, blocknum, buffer); - - if (status == SM_FAIL) - elog(ERROR, "cannot flush block %d of %s to stable store: %m", - blocknum, RelationGetRelationName(reln)); - - return status; -} - -/* * smgrblindwrt() -- Write a page out blind. * * In some cases, we may find a page in the buffer cache that we @@ -357,20 +341,18 @@ smgrflush(int16 which, Relation reln, BlockNumber blocknum, char *buffer) * that has not yet committed, which created a new relation. In * this case, the buffer manager will call smgrblindwrt() with * the name and OID of the database and the relation to which the - * buffer belongs. Every storage manager must be able to force - * this page down to stable storage in this circumstance. The - * write should be synchronous if dofsync is true. + * buffer belongs. Every storage manager must be able to write + * this page out to stable storage in this circumstance. */ int smgrblindwrt(int16 which, RelFileNode rnode, BlockNumber blkno, - char *buffer, - bool dofsync) + char *buffer) { int status; - status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer, dofsync); + status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer); if (status == SM_FAIL) elog(ERROR, "cannot write block %d of %u/%u blind: %m", @@ -380,53 +362,6 @@ smgrblindwrt(int16 which, } /* - * smgrmarkdirty() -- Mark a page dirty (needs fsync). - * - * Mark the specified page as needing to be fsync'd before commit. - * Ordinarily, the storage manager will do this implicitly during - * smgrwrite(). However, the buffer manager may discover that some - * other backend has written a buffer that we dirtied in the current - * transaction. In that case, we still need to fsync the file to be - * sure the page is down to disk before we commit. - */ -int -smgrmarkdirty(int16 which, - Relation reln, - BlockNumber blkno) -{ - int status; - - status = (*(smgrsw[which].smgr_markdirty)) (reln, blkno); - - if (status == SM_FAIL) - elog(ERROR, "cannot mark block %d of %s: %m", - blkno, RelationGetRelationName(reln)); - - return status; -} - -/* - * smgrblindmarkdirty() -- Mark a page dirty, "blind". - * - * Just like smgrmarkdirty, except we don't have a reldesc. - */ -int -smgrblindmarkdirty(int16 which, - RelFileNode rnode, - BlockNumber blkno) -{ - int status; - - status = (*(smgrsw[which].smgr_blindmarkdirty)) (rnode, blkno); - - if (status == SM_FAIL) - elog(ERROR, "cannot mark block %d of %u/%u blind: %m", - blkno, rnode.tblNode, rnode.relNode); - - return status; -} - -/* * smgrnblocks() -- Calculate the number of POSTGRES blocks in the * supplied relation. * @@ -504,7 +439,7 @@ smgrDoPendingDeletes(bool isCommit) * any in the commit case, but there can be in the abort * case). */ - DropRelFileNodeBuffers(pending->relnode); + DropRelFileNodeBuffers(pending->relnode, pending->isTemp); /* * Tell the free space map to forget this relation. It won't @@ -531,11 +466,13 @@ smgrDoPendingDeletes(bool isCommit) } /* - * smgrcommit(), smgrabort() -- Commit or abort changes made during the - * current transaction. + * smgrcommit() -- Prepare to commit changes made during the current + * transaction. + * + * This is called before we actually commit. */ int -smgrcommit() +smgrcommit(void) { int i; @@ -553,8 +490,11 @@ smgrcommit() return SM_SUCCESS; } +/* + * smgrabort() -- Abort changes made during the current transaction. + */ int -smgrabort() +smgrabort(void) { int i; @@ -572,8 +512,11 @@ smgrabort() return SM_SUCCESS; } +/* + * Sync files to disk at checkpoint time. + */ int -smgrsync() +smgrsync(void) { int i; |