diff options
author | Andres Freund <andres@anarazel.de> | 2017-12-01 16:30:56 -0800 |
---|---|---|
committer | Andres Freund <andres@anarazel.de> | 2017-12-01 16:30:56 -0800 |
commit | dc6c4c9dc2a111519b76b22daaaac86c5608223b (patch) | |
tree | b9ac520d1aa9163ae52755ee2bc1a549df0c6a57 /src/backend/storage/file/buffile.c | |
parent | 35438e5763c3021e579472e4b0c4a4d6038570b4 (diff) | |
download | postgresql-dc6c4c9dc2a111519b76b22daaaac86c5608223b.tar.gz postgresql-dc6c4c9dc2a111519b76b22daaaac86c5608223b.zip |
Add infrastructure for sharing temporary files between backends.
SharedFileSet allows temporary files to be created by one backend and
then exported for read-only access by other backends, with clean-up
managed by reference counting associated with a DSM segment. This
includes changes to fd.c and buffile.c to support the new kind of
temporary file.
This will be used by an upcoming patch adding support for parallel
hash joins.
Author: Thomas Munro
Reviewed-By: Peter Geoghegan, Andres Freund, Robert Haas, Rushabh Lathia
Discussion:
https://postgr.es/m/CAEepm=2W=cOkiZxcg6qiFQP-dHUe09aqTrEMM7yJDrHMhDv_RA@mail.gmail.com
https://postgr.es/m/CAH2-WznJ_UgLux=_jTgCQ4yFz0iBntudsNKa1we3kN1BAG=88w@mail.gmail.com
Diffstat (limited to 'src/backend/storage/file/buffile.c')
-rw-r--r-- | src/backend/storage/file/buffile.c | 205 |
1 files changed, 204 insertions, 1 deletions
diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c index 06bf2fadbf1..fa9940da9b3 100644 --- a/src/backend/storage/file/buffile.c +++ b/src/backend/storage/file/buffile.c @@ -31,12 +31,18 @@ * BufFile also supports temporary files that exceed the OS file size limit * (by opening multiple fd.c temporary files). This is an essential feature * for sorts and hashjoins on large amounts of data. + * + * BufFile supports temporary files that can be made read-only and shared with + * other backends, as infrastructure for parallel execution. Such files need + * to be created as a member of a SharedFileSet that all participants are + * attached to. *------------------------------------------------------------------------- */ #include "postgres.h" #include "executor/instrument.h" +#include "miscadmin.h" #include "pgstat.h" #include "storage/fd.h" #include "storage/buffile.h" @@ -70,6 +76,10 @@ struct BufFile bool isInterXact; /* keep open over transactions? */ bool dirty; /* does buffer need to be written? */ + bool readOnly; /* has the file been set to read only? */ + + SharedFileSet *fileset; /* space for segment files if shared */ + const char *name; /* name of this BufFile if shared */ /* * resowner is the ResourceOwner to use for underlying temp files. (We @@ -94,6 +104,7 @@ static void extendBufFile(BufFile *file); static void BufFileLoadBuffer(BufFile *file); static void BufFileDumpBuffer(BufFile *file); static int BufFileFlush(BufFile *file); +static File MakeNewSharedSegment(BufFile *file, int segment); /* @@ -117,6 +128,9 @@ makeBufFile(File firstfile) file->curOffset = 0L; file->pos = 0; file->nbytes = 0; + file->readOnly = false; + file->fileset = NULL; + file->name = NULL; return file; } @@ -134,7 +148,11 @@ extendBufFile(BufFile *file) oldowner = CurrentResourceOwner; CurrentResourceOwner = file->resowner; - pfile = OpenTemporaryFile(file->isInterXact); + if (file->fileset == NULL) + pfile = OpenTemporaryFile(file->isInterXact); + else + pfile = MakeNewSharedSegment(file, file->numFiles); + Assert(pfile >= 0); CurrentResourceOwner = oldowner; @@ -176,6 +194,189 @@ BufFileCreateTemp(bool interXact) } /* + * Build the name for a given segment of a given BufFile. + */ +static void +SharedSegmentName(char *name, const char *buffile_name, int segment) +{ + snprintf(name, MAXPGPATH, "%s.%d", buffile_name, segment); +} + +/* + * Create a new segment file backing a shared BufFile. + */ +static File +MakeNewSharedSegment(BufFile *buffile, int segment) +{ + char name[MAXPGPATH]; + File file; + + SharedSegmentName(name, buffile->name, segment); + file = SharedFileSetCreate(buffile->fileset, name); + + /* SharedFileSetCreate would've errored out */ + Assert(file > 0); + + return file; +} + +/* + * Create a BufFile that can be discovered and opened read-only by other + * backends that are attached to the same SharedFileSet using the same name. + * + * The naming scheme for shared BufFiles is left up to the calling code. The + * name will appear as part of one or more filenames on disk, and might + * provide clues to administrators about which subsystem is generating + * temporary file data. Since each SharedFileSet object is backed by one or + * more uniquely named temporary directory, names don't conflict with + * unrelated SharedFileSet objects. + */ +BufFile * +BufFileCreateShared(SharedFileSet *fileset, const char *name) +{ + BufFile *file; + + file = (BufFile *) palloc(sizeof(BufFile)); + file->fileset = fileset; + file->name = pstrdup(name); + file->numFiles = 1; + file->files = (File *) palloc(sizeof(File)); + file->files[0] = MakeNewSharedSegment(file, 0); + file->offsets = (off_t *) palloc(sizeof(off_t)); + file->offsets[0] = 0L; + file->isInterXact = false; + file->dirty = false; + file->resowner = CurrentResourceOwner; + file->curFile = 0; + file->curOffset = 0L; + file->pos = 0; + file->nbytes = 0; + file->readOnly = false; + file->name = pstrdup(name); + + return file; +} + +/* + * Open a file that was previously created in another backend (or this one) + * with BufFileCreateShared in the same SharedFileSet using the same name. + * The backend that created the file must have called BufFileClose() or + * BufFileExport() to make sure that it is ready to be opened by other + * backends and render it read-only. + */ +BufFile * +BufFileOpenShared(SharedFileSet *fileset, const char *name) +{ + BufFile *file = (BufFile *) palloc(sizeof(BufFile)); + char segment_name[MAXPGPATH]; + Size capacity = 16; + File *files = palloc(sizeof(File) * capacity); + int nfiles = 0; + + file = (BufFile *) palloc(sizeof(BufFile)); + files = palloc(sizeof(File) * capacity); + + /* + * We don't know how many segments there are, so we'll probe the + * filesystem to find out. + */ + for (;;) + { + /* See if we need to expand our file segment array. */ + if (nfiles + 1 > capacity) + { + capacity *= 2; + files = repalloc(files, sizeof(File) * capacity); + } + /* Try to load a segment. */ + SharedSegmentName(segment_name, name, nfiles); + files[nfiles] = SharedFileSetOpen(fileset, segment_name); + if (files[nfiles] <= 0) + break; + ++nfiles; + + CHECK_FOR_INTERRUPTS(); + } + + /* + * If we didn't find any files at all, then no BufFile exists with this + * name. + */ + if (nfiles == 0) + return NULL; + + file->numFiles = nfiles; + file->files = files; + file->offsets = (off_t *) palloc0(sizeof(off_t) * nfiles); + file->isInterXact = false; + file->dirty = false; + file->resowner = CurrentResourceOwner; /* Unused, can't extend */ + file->curFile = 0; + file->curOffset = 0L; + file->pos = 0; + file->nbytes = 0; + file->readOnly = true; /* Can't write to files opened this way */ + file->fileset = fileset; + file->name = pstrdup(name); + + return file; +} + +/* + * Delete a BufFile that was created by BufFileCreateShared in the given + * SharedFileSet using the given name. + * + * It is not necessary to delete files explicitly with this function. It is + * provided only as a way to delete files proactively, rather than waiting for + * the SharedFileSet to be cleaned up. + * + * Only one backend should attempt to delete a given name, and should know + * that it exists and has been exported or closed. + */ +void +BufFileDeleteShared(SharedFileSet *fileset, const char *name) +{ + char segment_name[MAXPGPATH]; + int segment = 0; + bool found = false; + + /* + * We don't know how many segments the file has. We'll keep deleting + * until we run out. If we don't manage to find even an initial segment, + * raise an error. + */ + for (;;) + { + SharedSegmentName(segment_name, name, segment); + if (!SharedFileSetDelete(fileset, segment_name, true)) + break; + found = true; + ++segment; + + CHECK_FOR_INTERRUPTS(); + } + + if (!found) + elog(ERROR, "could not delete unknown shared BufFile \"%s\"", name); +} + +/* + * BufFileExportShared --- flush and make read-only, in preparation for sharing. + */ +void +BufFileExportShared(BufFile *file) +{ + /* Must be a file belonging to a SharedFileSet. */ + Assert(file->fileset != NULL); + + /* It's probably a bug if someone calls this twice. */ + Assert(!file->readOnly); + + BufFileFlush(file); + file->readOnly = true; +} + +/* * Close a BufFile * * Like fclose(), this also implicitly FileCloses the underlying File. @@ -390,6 +591,8 @@ BufFileWrite(BufFile *file, void *ptr, size_t size) size_t nwritten = 0; size_t nthistime; + Assert(!file->readOnly); + while (size > 0) { if (file->pos >= BLCKSZ) |