aboutsummaryrefslogtreecommitdiff
path: root/src/backend/storage/smgr
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/storage/smgr')
-rw-r--r--src/backend/storage/smgr/md.c166
-rw-r--r--src/backend/storage/smgr/mm.c35
-rw-r--r--src/backend/storage/smgr/smgr.c119
3 files changed, 57 insertions, 263 deletions
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 978d85d4868..25051a9799c 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.91 2002/06/20 20:29:35 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.92 2002/08/06 02:36:34 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -381,16 +381,7 @@ mdclose_fd(int fd)
/* if not closed already */
if (v->mdfd_vfd >= 0)
- {
- /*
- * We sync the file descriptor so that we don't need to reopen
- * it at transaction commit to force changes to disk. (This
- * is not really optional, because we are about to forget that
- * the file even exists...)
- */
- FileSync(v->mdfd_vfd);
FileClose(v->mdfd_vfd);
- }
/* Now free vector */
v = v->mdfd_chain;
if (ov != &Md_fdvec[fd])
@@ -403,16 +394,7 @@ mdclose_fd(int fd)
if (v != (MdfdVec *) NULL)
{
if (v->mdfd_vfd >= 0)
- {
- /*
- * We sync the file descriptor so that we don't need to reopen
- * it at transaction commit to force changes to disk. (This
- * is not really optional, because we are about to forget that
- * the file even exists...)
- */
- FileSync(v->mdfd_vfd);
FileClose(v->mdfd_vfd);
- }
}
#endif
@@ -498,55 +480,15 @@ mdwrite(Relation reln, BlockNumber blocknum, char *buffer)
}
/*
- * mdflush() -- Synchronously write a block to disk.
- *
- * This is exactly like mdwrite(), but doesn't return until the file
- * system buffer cache has been flushed.
- */
-int
-mdflush(Relation reln, BlockNumber blocknum, char *buffer)
-{
- int status;
- long seekpos;
- MdfdVec *v;
-
- v = _mdfd_getseg(reln, blocknum);
-
-#ifndef LET_OS_MANAGE_FILESIZE
- seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
-#ifdef DIAGNOSTIC
- if (seekpos >= BLCKSZ * RELSEG_SIZE)
- elog(FATAL, "seekpos too big!");
-#endif
-#else
- seekpos = (long) (BLCKSZ * (blocknum));
-#endif
-
- if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
- return SM_FAIL;
-
- /* write and sync the block */
- status = SM_SUCCESS;
- if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ
- || FileSync(v->mdfd_vfd) < 0)
- status = SM_FAIL;
-
- return status;
-}
-
-/*
* mdblindwrt() -- Write a block to disk blind.
*
- * We have to be able to do this using only the name and OID of
- * the database and relation in which the block belongs. Otherwise
- * this is much like mdwrite(). If dofsync is TRUE, then we fsync
- * the file, making it more like mdflush().
+ * We have to be able to do this using only the rnode of the relation
+ * in which the block belongs. Otherwise this is much like mdwrite().
*/
int
mdblindwrt(RelFileNode rnode,
BlockNumber blkno,
- char *buffer,
- bool dofsync)
+ char *buffer)
{
int status;
long seekpos;
@@ -568,7 +510,6 @@ mdblindwrt(RelFileNode rnode,
#endif
errno = 0;
-
if (lseek(fd, seekpos, SEEK_SET) != seekpos)
{
elog(LOG, "mdblindwrt: lseek(%ld) failed: %m", seekpos);
@@ -578,7 +519,7 @@ mdblindwrt(RelFileNode rnode,
status = SM_SUCCESS;
- /* write and optionally sync the block */
+ /* write the block */
errno = 0;
if (write(fd, buffer, BLCKSZ) != BLCKSZ)
{
@@ -599,54 +540,6 @@ mdblindwrt(RelFileNode rnode,
}
/*
- * mdmarkdirty() -- Mark the specified block "dirty" (ie, needs fsync).
- *
- * Returns SM_SUCCESS or SM_FAIL.
- */
-int
-mdmarkdirty(Relation reln, BlockNumber blkno)
-{
- MdfdVec *v;
-
- v = _mdfd_getseg(reln, blkno);
-
- FileMarkDirty(v->mdfd_vfd);
-
- return SM_SUCCESS;
-}
-
-/*
- * mdblindmarkdirty() -- Mark the specified block "dirty" (ie, needs fsync).
- *
- * We have to be able to do this using only the name and OID of
- * the database and relation in which the block belongs. Otherwise
- * this is much like mdmarkdirty(). However, we do the fsync immediately
- * rather than building md/fd datastructures to postpone it till later.
- */
-int
-mdblindmarkdirty(RelFileNode rnode,
- BlockNumber blkno)
-{
- int status;
- int fd;
-
- fd = _mdfd_blind_getseg(rnode, blkno);
-
- if (fd < 0)
- return SM_FAIL;
-
- status = SM_SUCCESS;
-
- if (pg_fsync(fd) < 0)
- status = SM_FAIL;
-
- if (close(fd) < 0)
- status = SM_FAIL;
-
- return status;
-}
-
-/*
* mdnblocks() -- Get the number of blocks stored in a relation.
*
* Important side effect: all segments of the relation are opened
@@ -796,61 +689,36 @@ mdtruncate(Relation reln, BlockNumber nblocks)
/*
* mdcommit() -- Commit a transaction.
*
- * All changes to magnetic disk relations must be forced to stable
- * storage. This routine makes a pass over the private table of
- * file descriptors. Any descriptors to which we have done writes,
- * but not synced, are synced here.
- *
* Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
*/
int
-mdcommit()
+mdcommit(void)
{
- int i;
- MdfdVec *v;
-
- for (i = 0; i < CurFd; i++)
- {
- v = &Md_fdvec[i];
- if (v->mdfd_flags & MDFD_FREE)
- continue;
- /* Sync the file entry */
-#ifndef LET_OS_MANAGE_FILESIZE
- for (; v != (MdfdVec *) NULL; v = v->mdfd_chain)
-#else
- if (v != (MdfdVec *) NULL)
-#endif
- {
- if (FileSync(v->mdfd_vfd) < 0)
- return SM_FAIL;
- }
- }
-
+ /*
+ * We don't actually have to do anything here...
+ */
return SM_SUCCESS;
}
/*
* mdabort() -- Abort a transaction.
*
- * Changes need not be forced to disk at transaction abort. We mark
- * all file descriptors as clean here. Always returns SM_SUCCESS.
+ * Changes need not be forced to disk at transaction abort.
*/
int
-mdabort()
+mdabort(void)
{
/*
- * We don't actually have to do anything here. fd.c will discard
- * fsync-needed bits in its AtEOXact_Files() routine.
+ * We don't actually have to do anything here...
*/
return SM_SUCCESS;
}
/*
- * mdsync() -- Sync storage.
- *
+ * mdsync() -- Sync previous writes to stable storage.
*/
int
-mdsync()
+mdsync(void)
{
sync();
if (IsUnderPostmaster)
@@ -861,11 +729,9 @@ mdsync()
/*
* _fdvec_alloc () -- grab a free (or new) md file descriptor vector.
- *
*/
-static
-int
-_fdvec_alloc()
+static int
+_fdvec_alloc(void)
{
MdfdVec *nvec;
int fdvec,
diff --git a/src/backend/storage/smgr/mm.c b/src/backend/storage/smgr/mm.c
index 89396d173c9..739e938fe28 100644
--- a/src/backend/storage/smgr/mm.c
+++ b/src/backend/storage/smgr/mm.c
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.31 2002/06/20 20:29:36 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.32 2002/08/06 02:36:34 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -81,7 +81,7 @@ static HTAB *MMCacheHT;
static HTAB *MMRelCacheHT;
int
-mminit()
+mminit(void)
{
char *mmcacheblk;
int mmsize = 0;
@@ -151,7 +151,7 @@ mminit()
}
int
-mmshutdown()
+mmshutdown(void)
{
return SM_SUCCESS;
}
@@ -443,30 +443,15 @@ mmwrite(Relation reln, BlockNumber blocknum, char *buffer)
}
/*
- * mmflush() -- Synchronously write a block to stable storage.
- *
- * For main-memory relations, this is exactly equivalent to mmwrite().
- */
-int
-mmflush(Relation reln, BlockNumber blocknum, char *buffer)
-{
- return mmwrite(reln, blocknum, buffer);
-}
-
-/*
* mmblindwrt() -- Write a block to stable storage blind.
*
- * We have to be able to do this using only the name and OID of
- * the database and relation in which the block belongs.
+ * We have to be able to do this using only the rnode of the relation
+ * in which the block belongs. Otherwise this is much like mmwrite().
*/
int
-mmblindwrt(char *dbstr,
- char *relstr,
- Oid dbid,
- Oid relid,
+mmblindwrt(RelFileNode rnode,
BlockNumber blkno,
- char *buffer,
- bool dofsync)
+ char *buffer)
{
return SM_FAIL;
}
@@ -512,7 +497,7 @@ mmnblocks(Relation reln)
* Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
*/
int
-mmcommit()
+mmcommit(void)
{
return SM_SUCCESS;
}
@@ -522,7 +507,7 @@ mmcommit()
*/
int
-mmabort()
+mmabort(void)
{
return SM_SUCCESS;
}
@@ -536,7 +521,7 @@ mmabort()
* manager will use.
*/
int
-MMShmemSize()
+MMShmemSize(void)
{
int size = 0;
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index a7fb23b4427..252781d9c3f 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.57 2002/06/20 20:29:36 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.58 2002/08/06 02:36:34 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -40,12 +40,8 @@ typedef struct f_smgr
char *buffer);
int (*smgr_write) (Relation reln, BlockNumber blocknum,
char *buffer);
- int (*smgr_flush) (Relation reln, BlockNumber blocknum,
- char *buffer);
int (*smgr_blindwrt) (RelFileNode rnode, BlockNumber blkno,
- char *buffer, bool dofsync);
- int (*smgr_markdirty) (Relation reln, BlockNumber blkno);
- int (*smgr_blindmarkdirty) (RelFileNode, BlockNumber blkno);
+ char *buffer);
BlockNumber (*smgr_nblocks) (Relation reln);
BlockNumber (*smgr_truncate) (Relation reln, BlockNumber nblocks);
int (*smgr_commit) (void); /* may be NULL */
@@ -62,15 +58,15 @@ static f_smgr smgrsw[] = {
/* magnetic disk */
{mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose,
- mdread, mdwrite, mdflush, mdblindwrt, mdmarkdirty, mdblindmarkdirty,
+ mdread, mdwrite, mdblindwrt,
mdnblocks, mdtruncate, mdcommit, mdabort, mdsync
},
#ifdef STABLE_MEMORY_STORAGE
/* main memory */
{mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose,
- mmread, mmwrite, mmflush, mmblindwrt, mmmarkdirty, mmblindmarkdirty,
- mmnblocks, NULL, mmcommit, mmabort},
+ mmread, mmwrite, mmblindwrt,
+ mmnblocks, NULL, mmcommit, mmabort, NULL},
#endif
};
@@ -110,6 +106,7 @@ typedef struct PendingRelDelete
{
RelFileNode relnode; /* relation that may need to be deleted */
int16 which; /* which storage manager? */
+ bool isTemp; /* is it a temporary relation? */
bool atCommit; /* T=delete at commit; F=delete at abort */
struct PendingRelDelete *next; /* linked-list link */
} PendingRelDelete;
@@ -123,7 +120,7 @@ static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
*
*/
int
-smgrinit()
+smgrinit(void)
{
int i;
@@ -181,6 +178,7 @@ smgrcreate(int16 which, Relation reln)
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
pending->relnode = reln->rd_node;
pending->which = which;
+ pending->isTemp = reln->rd_istemp;
pending->atCommit = false; /* delete if abort */
pending->next = pendingDeletes;
pendingDeletes = pending;
@@ -208,6 +206,7 @@ smgrunlink(int16 which, Relation reln)
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
pending->relnode = reln->rd_node;
pending->which = which;
+ pending->isTemp = reln->rd_istemp;
pending->atCommit = true; /* delete if commit */
pending->next = pendingDeletes;
pendingDeletes = pending;
@@ -312,8 +311,10 @@ smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
/*
* smgrwrite() -- Write the supplied buffer out.
*
- * This is not a synchronous write -- the interface for that is
- * smgrflush(). The buffer is written out via the appropriate
+ * This is not a synchronous write -- the block is not necessarily
+ * on disk at return, only dumped out to the kernel.
+ *
+ * The buffer is written out via the appropriate
* storage manager. This routine returns SM_SUCCESS or aborts
* the current transaction.
*/
@@ -332,23 +333,6 @@ smgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
}
/*
- * smgrflush() -- A synchronous smgrwrite().
- */
-int
-smgrflush(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
-{
- int status;
-
- status = (*(smgrsw[which].smgr_flush)) (reln, blocknum, buffer);
-
- if (status == SM_FAIL)
- elog(ERROR, "cannot flush block %d of %s to stable store: %m",
- blocknum, RelationGetRelationName(reln));
-
- return status;
-}
-
-/*
* smgrblindwrt() -- Write a page out blind.
*
* In some cases, we may find a page in the buffer cache that we
@@ -357,20 +341,18 @@ smgrflush(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
* that has not yet committed, which created a new relation. In
* this case, the buffer manager will call smgrblindwrt() with
* the name and OID of the database and the relation to which the
- * buffer belongs. Every storage manager must be able to force
- * this page down to stable storage in this circumstance. The
- * write should be synchronous if dofsync is true.
+ * buffer belongs. Every storage manager must be able to write
+ * this page out to stable storage in this circumstance.
*/
int
smgrblindwrt(int16 which,
RelFileNode rnode,
BlockNumber blkno,
- char *buffer,
- bool dofsync)
+ char *buffer)
{
int status;
- status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer, dofsync);
+ status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer);
if (status == SM_FAIL)
elog(ERROR, "cannot write block %d of %u/%u blind: %m",
@@ -380,53 +362,6 @@ smgrblindwrt(int16 which,
}
/*
- * smgrmarkdirty() -- Mark a page dirty (needs fsync).
- *
- * Mark the specified page as needing to be fsync'd before commit.
- * Ordinarily, the storage manager will do this implicitly during
- * smgrwrite(). However, the buffer manager may discover that some
- * other backend has written a buffer that we dirtied in the current
- * transaction. In that case, we still need to fsync the file to be
- * sure the page is down to disk before we commit.
- */
-int
-smgrmarkdirty(int16 which,
- Relation reln,
- BlockNumber blkno)
-{
- int status;
-
- status = (*(smgrsw[which].smgr_markdirty)) (reln, blkno);
-
- if (status == SM_FAIL)
- elog(ERROR, "cannot mark block %d of %s: %m",
- blkno, RelationGetRelationName(reln));
-
- return status;
-}
-
-/*
- * smgrblindmarkdirty() -- Mark a page dirty, "blind".
- *
- * Just like smgrmarkdirty, except we don't have a reldesc.
- */
-int
-smgrblindmarkdirty(int16 which,
- RelFileNode rnode,
- BlockNumber blkno)
-{
- int status;
-
- status = (*(smgrsw[which].smgr_blindmarkdirty)) (rnode, blkno);
-
- if (status == SM_FAIL)
- elog(ERROR, "cannot mark block %d of %u/%u blind: %m",
- blkno, rnode.tblNode, rnode.relNode);
-
- return status;
-}
-
-/*
* smgrnblocks() -- Calculate the number of POSTGRES blocks in the
* supplied relation.
*
@@ -504,7 +439,7 @@ smgrDoPendingDeletes(bool isCommit)
* any in the commit case, but there can be in the abort
* case).
*/
- DropRelFileNodeBuffers(pending->relnode);
+ DropRelFileNodeBuffers(pending->relnode, pending->isTemp);
/*
* Tell the free space map to forget this relation. It won't
@@ -531,11 +466,13 @@ smgrDoPendingDeletes(bool isCommit)
}
/*
- * smgrcommit(), smgrabort() -- Commit or abort changes made during the
- * current transaction.
+ * smgrcommit() -- Prepare to commit changes made during the current
+ * transaction.
+ *
+ * This is called before we actually commit.
*/
int
-smgrcommit()
+smgrcommit(void)
{
int i;
@@ -553,8 +490,11 @@ smgrcommit()
return SM_SUCCESS;
}
+/*
+ * smgrabort() -- Abort changes made during the current transaction.
+ */
int
-smgrabort()
+smgrabort(void)
{
int i;
@@ -572,8 +512,11 @@ smgrabort()
return SM_SUCCESS;
}
+/*
+ * Sync files to disk at checkpoint time.
+ */
int
-smgrsync()
+smgrsync(void)
{
int i;