diff options
Diffstat (limited to 'src/backend/storage')
-rw-r--r-- | src/backend/storage/file/fd.c | 209 | ||||
-rw-r--r-- | src/backend/storage/large_object/inv_api.c | 122 | ||||
-rw-r--r-- | src/backend/storage/lmgr/lmgr.c | 16 |
3 files changed, 244 insertions, 103 deletions
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 96de54110cf..918d541e2a7 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.109 2004/05/31 03:48:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.110 2004/07/28 14:23:28 tgl Exp $ * * NOTES: * @@ -47,6 +47,7 @@ #include <fcntl.h> #include "miscadmin.h" +#include "access/xact.h" #include "storage/fd.h" #include "storage/ipc.h" @@ -122,6 +123,7 @@ typedef struct vfd { signed short fd; /* current FD, or VFD_CLOSED if none */ unsigned short fdstate; /* bitflags for VFD's state */ + TransactionId create_xid; /* for XACT_TEMPORARY fds, creating Xid */ File nextFree; /* link to next free VFD, if in freelist */ File lruMoreRecently; /* doubly linked recency-of-use list */ File lruLessRecently; @@ -146,27 +148,31 @@ static Size SizeVfdCache = 0; static int nfile = 0; /* - * List of stdio FILEs opened with AllocateFile. + * List of stdio FILEs and <dirent.h> DIRs opened with AllocateFile + * and AllocateDir. * - * Since we don't want to encourage heavy use of AllocateFile, it seems - * OK to put a pretty small maximum limit on the number of simultaneously - * allocated files. + * Since we don't want to encourage heavy use of AllocateFile or AllocateDir, + * it seems OK to put a pretty small maximum limit on the number of + * simultaneously allocated descs. */ -#define MAX_ALLOCATED_FILES 32 +#define MAX_ALLOCATED_DESCS 32 -static int numAllocatedFiles = 0; -static FILE *allocatedFiles[MAX_ALLOCATED_FILES]; +typedef enum { + AllocateDescFile, + AllocateDescDir +} AllocateDescKind; -/* - * List of <dirent.h> DIRs opened with AllocateDir. - * - * Since we don't have heavy use of AllocateDir, it seems OK to put a pretty - * small maximum limit on the number of simultaneously allocated dirs. - */ -#define MAX_ALLOCATED_DIRS 10 +typedef struct { + AllocateDescKind kind; + union { + FILE *file; + DIR *dir; + } desc; + TransactionId create_xid; +} AllocateDesc; -static int numAllocatedDirs = 0; -static DIR *allocatedDirs[MAX_ALLOCATED_DIRS]; +static int numAllocatedDescs = 0; +static AllocateDesc allocatedDescs[MAX_ALLOCATED_DESCS]; /* * Number of temporary files opened during the current session; @@ -499,7 +505,7 @@ LruInsert(File file) if (FileIsNotOpen(file)) { - while (nfile + numAllocatedFiles + numAllocatedDirs >= max_safe_fds) + while (nfile + numAllocatedDescs >= max_safe_fds) { if (!ReleaseLruFile()) break; @@ -759,7 +765,7 @@ fileNameOpenFile(FileName fileName, file = AllocateVfd(); vfdP = &VfdCache[file]; - while (nfile + numAllocatedFiles + numAllocatedDirs >= max_safe_fds) + while (nfile + numAllocatedDescs >= max_safe_fds) { if (!ReleaseLruFile()) break; @@ -876,7 +882,10 @@ OpenTemporaryFile(bool interXact) /* Mark it for deletion at EOXact */ if (!interXact) + { VfdCache[file].fdstate |= FD_XACT_TEMPORARY; + VfdCache[file].create_xid = GetCurrentTransactionId(); + } return file; } @@ -1134,24 +1143,29 @@ AllocateFile(char *name, char *mode) { FILE *file; - DO_DB(elog(LOG, "AllocateFile: Allocated %d", numAllocatedFiles)); + DO_DB(elog(LOG, "AllocateFile: Allocated %d (%s)", + numAllocatedDescs, name)); /* - * The test against MAX_ALLOCATED_FILES prevents us from overflowing + * The test against MAX_ALLOCATED_DESCS prevents us from overflowing * allocatedFiles[]; the test against max_safe_fds prevents AllocateFile * from hogging every one of the available FDs, which'd lead to infinite * looping. */ - if (numAllocatedFiles >= MAX_ALLOCATED_FILES || - numAllocatedFiles + numAllocatedDirs >= max_safe_fds - 1) + if (numAllocatedDescs >= MAX_ALLOCATED_DESCS || + numAllocatedDescs >= max_safe_fds - 1) elog(ERROR, "too many private files demanded"); TryAgain: if ((file = fopen(name, mode)) != NULL) { - allocatedFiles[numAllocatedFiles] = file; - numAllocatedFiles++; - return file; + AllocateDesc *desc = &allocatedDescs[numAllocatedDescs]; + + desc->kind = AllocateDescFile; + desc->desc.file = file; + desc->create_xid = GetCurrentTransactionId(); + numAllocatedDescs++; + return desc->desc.file; } if (errno == EMFILE || errno == ENFILE) @@ -1171,6 +1185,38 @@ TryAgain: } /* + * Free an AllocateDesc of either type. + * + * The argument *must* point into the allocatedDescs[] array. + */ +static int +FreeDesc(AllocateDesc *desc) +{ + int result; + + /* Close the underlying object */ + switch (desc->kind) + { + case AllocateDescFile: + result = fclose(desc->desc.file); + break; + case AllocateDescDir: + result = closedir(desc->desc.dir); + break; + default: + elog(ERROR, "AllocateDesc kind not recognized"); + result = 0; /* keep compiler quiet */ + break; + } + + /* Compact storage in the allocatedDescs array */ + numAllocatedDescs--; + *desc = allocatedDescs[numAllocatedDescs]; + + return result; +} + +/* * Close a file returned by AllocateFile. * * Note we do not check fclose's return value --- it is up to the caller @@ -1181,20 +1227,19 @@ FreeFile(FILE *file) { int i; - DO_DB(elog(LOG, "FreeFile: Allocated %d", numAllocatedFiles)); + DO_DB(elog(LOG, "FreeFile: Allocated %d", numAllocatedDescs)); /* Remove file from list of allocated files, if it's present */ - for (i = numAllocatedFiles; --i >= 0;) + for (i = numAllocatedDescs; --i >= 0;) { - if (allocatedFiles[i] == file) - { - numAllocatedFiles--; - allocatedFiles[i] = allocatedFiles[numAllocatedFiles]; - break; - } + AllocateDesc *desc = &allocatedDescs[i]; + + if (desc->kind == AllocateDescFile && desc->desc.file == file) + return FreeDesc(desc); } - if (i < 0) - elog(WARNING, "file passed to FreeFile was not obtained from AllocateFile"); + + /* Only get here if someone passes us a file not in allocatedDescs */ + elog(WARNING, "file passed to FreeFile was not obtained from AllocateFile"); return fclose(file); } @@ -1213,24 +1258,29 @@ AllocateDir(const char *dirname) { DIR *dir; - DO_DB(elog(LOG, "AllocateDir: Allocated %d", numAllocatedDirs)); + DO_DB(elog(LOG, "AllocateDir: Allocated %d (%s)", + numAllocatedDescs, dirname)); /* - * The test against MAX_ALLOCATED_DIRS prevents us from overflowing - * allocatedDirs[]; the test against max_safe_fds prevents AllocateDir + * The test against MAX_ALLOCATED_DESCS prevents us from overflowing + * allocatedDescs[]; the test against max_safe_fds prevents AllocateDir * from hogging every one of the available FDs, which'd lead to infinite * looping. */ - if (numAllocatedDirs >= MAX_ALLOCATED_DIRS || - numAllocatedDirs + numAllocatedFiles >= max_safe_fds - 1) + if (numAllocatedDescs >= MAX_ALLOCATED_DESCS || + numAllocatedDescs >= max_safe_fds - 1) elog(ERROR, "too many private dirs demanded"); TryAgain: if ((dir = opendir(dirname)) != NULL) { - allocatedDirs[numAllocatedDirs] = dir; - numAllocatedDirs++; - return dir; + AllocateDesc *desc = &allocatedDescs[numAllocatedDescs]; + + desc->kind = AllocateDescDir; + desc->desc.dir = dir; + desc->create_xid = GetCurrentTransactionId(); + numAllocatedDescs++; + return desc->desc.dir; } if (errno == EMFILE || errno == ENFILE) @@ -1260,20 +1310,19 @@ FreeDir(DIR *dir) { int i; - DO_DB(elog(LOG, "FreeDir: Allocated %d", numAllocatedDirs)); + DO_DB(elog(LOG, "FreeDir: Allocated %d", numAllocatedDescs)); /* Remove dir from list of allocated dirs, if it's present */ - for (i = numAllocatedDirs; --i >= 0;) + for (i = numAllocatedDescs; --i >= 0;) { - if (allocatedDirs[i] == dir) - { - numAllocatedDirs--; - allocatedDirs[i] = allocatedDirs[numAllocatedDirs]; - break; - } + AllocateDesc *desc = &allocatedDescs[i]; + + if (desc->kind == AllocateDescDir && desc->desc.dir == dir) + return FreeDesc(desc); } - if (i < 0) - elog(WARNING, "dir passed to FreeDir was not obtained from AllocateDir"); + + /* Only get here if someone passes us a dir not in allocatedDescs */ + elog(WARNING, "dir passed to FreeDir was not obtained from AllocateDir"); return closedir(dir); } @@ -1303,6 +1352,51 @@ closeAllVfds(void) } /* + * AtEOSubXact_Files + * + * Take care of subtransaction commit/abort. At abort, we close temp files + * that the subtransaction may have opened. At commit, we reassign the + * files that were opened to the parent transaction. + */ +void +AtEOSubXact_Files(bool isCommit, TransactionId myXid, TransactionId parentXid) +{ + Index i; + + if (SizeVfdCache > 0) + { + Assert(FileIsNotOpen(0)); /* Make sure ring not corrupted */ + for (i = 1; i < SizeVfdCache; i++) + { + unsigned short fdstate = VfdCache[i].fdstate; + + if ((fdstate & FD_XACT_TEMPORARY) && + VfdCache[i].create_xid == myXid) + { + if (isCommit) + VfdCache[i].create_xid = parentXid; + else if (VfdCache[i].fileName != NULL) + FileClose(i); + } + } + } + + for (i = 0; i < numAllocatedDescs; i++) + { + if (allocatedDescs[i].create_xid == myXid) + { + if (isCommit) + allocatedDescs[i].create_xid = parentXid; + else + { + /* have to recheck the item after FreeDesc (ugly) */ + FreeDesc(&allocatedDescs[i--]); + } + } + } +} + +/* * AtEOXact_Files * * This routine is called during transaction commit or abort (it doesn't @@ -1362,11 +1456,8 @@ CleanupTempFiles(bool isProcExit) } } - while (numAllocatedFiles > 0) - FreeFile(allocatedFiles[0]); - - while (numAllocatedDirs > 0) - FreeDir(allocatedDirs[0]); + while (numAllocatedDescs > 0) + FreeDesc(&allocatedDescs[0]); } diff --git a/src/backend/storage/large_object/inv_api.c b/src/backend/storage/large_object/inv_api.c index 5f75e06e189..470dcf11aa9 100644 --- a/src/backend/storage/large_object/inv_api.c +++ b/src/backend/storage/large_object/inv_api.c @@ -9,36 +9,92 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/large_object/inv_api.c,v 1.102 2003/11/29 19:51:56 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/storage/large_object/inv_api.c,v 1.103 2004/07/28 14:23:29 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#include <errno.h> -#include <sys/file.h> -#include <sys/stat.h> - #include "access/genam.h" #include "access/heapam.h" -#include "access/htup.h" #include "access/tuptoaster.h" #include "catalog/catalog.h" #include "catalog/catname.h" -#include "catalog/heap.h" -#include "catalog/index.h" #include "catalog/indexing.h" -#include "catalog/pg_opclass.h" #include "catalog/pg_largeobject.h" -#include "catalog/pg_type.h" #include "commands/comment.h" #include "libpq/libpq-fs.h" -#include "miscadmin.h" #include "storage/large_object.h" -#include "storage/smgr.h" -#include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/lsyscache.h" +#include "utils/resowner.h" + + +/* + * All accesses to pg_largeobject and its index make use of a single Relation + * reference, so that we only need to open pg_relation once per transaction. + * To avoid problems when the first such reference occurs inside a + * subtransaction, we execute a slightly klugy maneuver to assign ownership of + * the Relation reference to TopTransactionResourceOwner. + */ +static Relation lo_heap_r = NULL; +static Relation lo_index_r = NULL; + + +/* + * Open pg_largeobject and its index, if not already done in current xact + */ +static void +open_lo_relation(void) +{ + ResourceOwner currentOwner; + + if (lo_heap_r && lo_index_r) + return; /* already open in current xact */ + + /* Arrange for the top xact to own these relation references */ + currentOwner = CurrentResourceOwner; + CurrentResourceOwner = TopTransactionResourceOwner; + + /* Use RowExclusiveLock since we might either read or write */ + if (lo_heap_r == NULL) + lo_heap_r = heap_openr(LargeObjectRelationName, RowExclusiveLock); + if (lo_index_r == NULL) + lo_index_r = index_openr(LargeObjectLOidPNIndex); + + CurrentResourceOwner = currentOwner; +} + +/* + * Clean up at main transaction end + */ +void +close_lo_relation(bool isCommit) +{ + if (lo_heap_r || lo_index_r) + { + /* + * Only bother to close if committing; else abort cleanup will + * handle it + */ + if (isCommit) + { + ResourceOwner currentOwner; + + currentOwner = CurrentResourceOwner; + CurrentResourceOwner = TopTransactionResourceOwner; + + if (lo_index_r) + index_close(lo_index_r); + if (lo_heap_r) + heap_close(lo_heap_r, NoLock); + + CurrentResourceOwner = currentOwner; + } + lo_heap_r = NULL; + lo_index_r = NULL; + } +} static int32 @@ -50,6 +106,7 @@ getbytealen(bytea *data) return (VARSIZE(data) - VARHDRSZ); } + /* * inv_create -- create a new large object. * @@ -92,23 +149,20 @@ inv_create(int flags) retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc)); retval->id = file_oid; + retval->xid = GetCurrentTransactionId(); retval->offset = 0; if (flags & INV_WRITE) { retval->flags = IFS_WRLOCK | IFS_RDLOCK; - retval->heap_r = heap_openr(LargeObjectRelationName, RowExclusiveLock); } else if (flags & INV_READ) { retval->flags = IFS_RDLOCK; - retval->heap_r = heap_openr(LargeObjectRelationName, AccessShareLock); } else elog(ERROR, "invalid flags: %d", flags); - retval->index_r = index_openr(LargeObjectLOidPNIndex); - return retval; } @@ -131,23 +185,20 @@ inv_open(Oid lobjId, int flags) retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc)); retval->id = lobjId; + retval->xid = GetCurrentTransactionId(); retval->offset = 0; if (flags & INV_WRITE) { retval->flags = IFS_WRLOCK | IFS_RDLOCK; - retval->heap_r = heap_openr(LargeObjectRelationName, RowExclusiveLock); } else if (flags & INV_READ) { retval->flags = IFS_RDLOCK; - retval->heap_r = heap_openr(LargeObjectRelationName, AccessShareLock); } else elog(ERROR, "invalid flags: %d", flags); - retval->index_r = index_openr(LargeObjectLOidPNIndex); - return retval; } @@ -158,13 +209,6 @@ void inv_close(LargeObjectDesc *obj_desc) { Assert(PointerIsValid(obj_desc)); - - if (obj_desc->flags & IFS_WRLOCK) - heap_close(obj_desc->heap_r, RowExclusiveLock); - else if (obj_desc->flags & IFS_RDLOCK) - heap_close(obj_desc->heap_r, AccessShareLock); - index_close(obj_desc->index_r); - pfree(obj_desc); } @@ -212,12 +256,14 @@ inv_getsize(LargeObjectDesc *obj_desc) Assert(PointerIsValid(obj_desc)); + open_lo_relation(); + ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); - sd = index_beginscan(obj_desc->heap_r, obj_desc->index_r, + sd = index_beginscan(lo_heap_r, lo_index_r, SnapshotNow, 1, skey); /* @@ -316,6 +362,8 @@ inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes) if (nbytes <= 0) return 0; + open_lo_relation(); + ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, @@ -326,7 +374,7 @@ inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes) BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); - sd = index_beginscan(obj_desc->heap_r, obj_desc->index_r, + sd = index_beginscan(lo_heap_r, lo_index_r, SnapshotNow, 2, skey); while ((tuple = index_getnext(sd, ForwardScanDirection)) != NULL) @@ -421,7 +469,9 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes) if (nbytes <= 0) return 0; - indstate = CatalogOpenIndexes(obj_desc->heap_r); + open_lo_relation(); + + indstate = CatalogOpenIndexes(lo_heap_r); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, @@ -433,7 +483,7 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes) BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); - sd = index_beginscan(obj_desc->heap_r, obj_desc->index_r, + sd = index_beginscan(lo_heap_r, lo_index_r, SnapshotNow, 2, skey); oldtuple = NULL; @@ -510,9 +560,9 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes) memset(replace, ' ', sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = 'r'; - newtup = heap_modifytuple(oldtuple, obj_desc->heap_r, + newtup = heap_modifytuple(oldtuple, lo_heap_r, values, nulls, replace); - simple_heap_update(obj_desc->heap_r, &newtup->t_self, newtup); + simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); @@ -554,8 +604,8 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes) values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); - newtup = heap_formtuple(obj_desc->heap_r->rd_att, values, nulls); - simple_heap_insert(obj_desc->heap_r, newtup); + newtup = heap_formtuple(lo_heap_r->rd_att, values, nulls); + simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 176767507c2..11d73c58300 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.65 2004/07/27 05:10:58 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.66 2004/07/28 14:23:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -137,7 +137,7 @@ LockRelation(Relation relation, LOCKMODE lockmode) tag.dbId = relation->rd_lockInfo.lockRelId.dbId; tag.objId.blkno = InvalidBlockNumber; - if (!LockAcquire(LockTableId, &tag, GetCurrentTransactionId(), + if (!LockAcquire(LockTableId, &tag, GetTopTransactionId(), lockmode, false)) elog(ERROR, "LockAcquire failed"); @@ -171,7 +171,7 @@ ConditionalLockRelation(Relation relation, LOCKMODE lockmode) tag.dbId = relation->rd_lockInfo.lockRelId.dbId; tag.objId.blkno = InvalidBlockNumber; - if (!LockAcquire(LockTableId, &tag, GetCurrentTransactionId(), + if (!LockAcquire(LockTableId, &tag, GetTopTransactionId(), lockmode, true)) return false; @@ -201,7 +201,7 @@ UnlockRelation(Relation relation, LOCKMODE lockmode) tag.dbId = relation->rd_lockInfo.lockRelId.dbId; tag.objId.blkno = InvalidBlockNumber; - LockRelease(LockTableId, &tag, GetCurrentTransactionId(), lockmode); + LockRelease(LockTableId, &tag, GetTopTransactionId(), lockmode); } /* @@ -264,7 +264,7 @@ LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode) tag.dbId = relation->rd_lockInfo.lockRelId.dbId; tag.objId.blkno = blkno; - if (!LockAcquire(LockTableId, &tag, GetCurrentTransactionId(), + if (!LockAcquire(LockTableId, &tag, GetTopTransactionId(), lockmode, false)) elog(ERROR, "LockAcquire failed"); } @@ -285,7 +285,7 @@ ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode) tag.dbId = relation->rd_lockInfo.lockRelId.dbId; tag.objId.blkno = blkno; - return LockAcquire(LockTableId, &tag, GetCurrentTransactionId(), + return LockAcquire(LockTableId, &tag, GetTopTransactionId(), lockmode, true); } @@ -302,7 +302,7 @@ UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode) tag.dbId = relation->rd_lockInfo.lockRelId.dbId; tag.objId.blkno = blkno; - LockRelease(LockTableId, &tag, GetCurrentTransactionId(), lockmode); + LockRelease(LockTableId, &tag, GetTopTransactionId(), lockmode); } /* @@ -343,7 +343,7 @@ void XactLockTableWait(TransactionId xid) { LOCKTAG tag; - TransactionId myxid = GetCurrentTransactionId(); + TransactionId myxid = GetTopTransactionId(); Assert(!SubTransXidsHaveCommonAncestor(xid, myxid)); |