diff options
-rw-r--r-- | doc/src/sgml/monitoring.sgml | 4 | ||||
-rw-r--r-- | src/backend/postmaster/pgstat.c | 3 | ||||
-rw-r--r-- | src/backend/storage/file/buffile.c | 129 | ||||
-rw-r--r-- | src/backend/storage/file/fd.c | 9 | ||||
-rw-r--r-- | src/backend/storage/file/sharedfileset.c | 105 | ||||
-rw-r--r-- | src/backend/utils/sort/logtape.c | 4 | ||||
-rw-r--r-- | src/backend/utils/sort/sharedtuplestore.c | 2 | ||||
-rw-r--r-- | src/include/pgstat.h | 1 | ||||
-rw-r--r-- | src/include/storage/buffile.h | 4 | ||||
-rw-r--r-- | src/include/storage/fd.h | 2 | ||||
-rw-r--r-- | src/include/storage/sharedfileset.h | 4 |
11 files changed, 239 insertions, 28 deletions
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 0f11375c852..17a0df69784 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -1203,6 +1203,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser <entry>Waiting for a write to a buffered file.</entry> </row> <row> + <entry><literal>BufFileTruncate</literal></entry> + <entry>Waiting for a buffered file to be truncated.</entry> + </row> + <row> <entry><literal>ControlFileRead</literal></entry> <entry>Waiting for a read from the <filename>pg_control</filename> file.</entry> diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 73ce944fb1c..8116b236143 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -3940,6 +3940,9 @@ pgstat_get_wait_io(WaitEventIO w) case WAIT_EVENT_BUFFILE_WRITE: event_name = "BufFileWrite"; break; + case WAIT_EVENT_BUFFILE_TRUNCATE: + event_name = "BufFileTruncate"; + break; case WAIT_EVENT_CONTROL_FILE_READ: event_name = "ControlFileRead"; break; diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c index 2d7a0823208..d581f96eda9 100644 --- a/src/backend/storage/file/buffile.c +++ b/src/backend/storage/file/buffile.c @@ -32,10 +32,14 @@ * (by opening multiple fd.c temporary files). This is an essential feature * for sorts and hashjoins on large amounts of data. * - * BufFile supports temporary files that can be made read-only and shared with - * other backends, as infrastructure for parallel execution. Such files need - * to be created as a member of a SharedFileSet that all participants are - * attached to. + * BufFile supports temporary files that can be shared with other backends, as + * infrastructure for parallel execution. Such files need to be created as a + * member of a SharedFileSet that all participants are attached to. + * + * BufFile also supports temporary files that can be used by the single backend + * when the corresponding files need to be survived across the transaction and + * need to be opened and closed multiple times. Such files need to be created + * as a member of a SharedFileSet. *------------------------------------------------------------------------- */ @@ -277,7 +281,7 @@ BufFileCreateShared(SharedFileSet *fileset, const char *name) * backends and render it read-only. */ BufFile * -BufFileOpenShared(SharedFileSet *fileset, const char *name) +BufFileOpenShared(SharedFileSet *fileset, const char *name, int mode) { BufFile *file; char segment_name[MAXPGPATH]; @@ -301,7 +305,7 @@ BufFileOpenShared(SharedFileSet *fileset, const char *name) } /* Try to load a segment. */ SharedSegmentName(segment_name, name, nfiles); - files[nfiles] = SharedFileSetOpen(fileset, segment_name); + files[nfiles] = SharedFileSetOpen(fileset, segment_name, mode); if (files[nfiles] <= 0) break; ++nfiles; @@ -321,7 +325,7 @@ BufFileOpenShared(SharedFileSet *fileset, const char *name) file = makeBufFileCommon(nfiles); file->files = files; - file->readOnly = true; /* Can't write to files opened this way */ + file->readOnly = (mode == O_RDONLY) ? true : false; file->fileset = fileset; file->name = pstrdup(name); @@ -666,11 +670,21 @@ BufFileSeek(BufFile *file, int fileno, off_t offset, int whence) newFile = file->curFile; newOffset = (file->curOffset + file->pos) + offset; break; -#ifdef NOT_USED case SEEK_END: - /* could be implemented, not needed currently */ + + /* + * The file size of the last file gives us the end offset of that + * file. + */ + newFile = file->numFiles - 1; + newOffset = FileSize(file->files[file->numFiles - 1]); + if (newOffset < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not determine size of temporary file \"%s\" from BufFile \"%s\": %m", + FilePathName(file->files[file->numFiles - 1]), + file->name))); break; -#endif default: elog(ERROR, "invalid whence: %d", whence); return EOF; @@ -838,3 +852,98 @@ BufFileAppend(BufFile *target, BufFile *source) return startBlock; } + +/* + * Truncate a BufFile created by BufFileCreateShared up to the given fileno and + * the offset. + */ +void +BufFileTruncateShared(BufFile *file, int fileno, off_t offset) +{ + int numFiles = file->numFiles; + int newFile = fileno; + off_t newOffset = file->curOffset; + char segment_name[MAXPGPATH]; + int i; + + /* + * Loop over all the files up to the given fileno and remove the files + * that are greater than the fileno and truncate the given file up to the + * offset. Note that we also remove the given fileno if the offset is 0 + * provided it is not the first file in which we truncate it. + */ + for (i = file->numFiles - 1; i >= fileno; i--) + { + if ((i != fileno || offset == 0) && i != 0) + { + SharedSegmentName(segment_name, file->name, i); + FileClose(file->files[i]); + if (!SharedFileSetDelete(file->fileset, segment_name, true)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not delete shared fileset \"%s\": %m", + segment_name))); + numFiles--; + newOffset = MAX_PHYSICAL_FILESIZE; + + /* + * This is required to indicate that we have deleted the given + * fileno. + */ + if (i == fileno) + newFile--; + } + else + { + if (FileTruncate(file->files[i], offset, + WAIT_EVENT_BUFFILE_TRUNCATE) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not truncate file \"%s\": %m", + FilePathName(file->files[i])))); + newOffset = offset; + } + } + + file->numFiles = numFiles; + + /* + * If the truncate point is within existing buffer then we can just adjust + * pos within buffer. + */ + if (newFile == file->curFile && + newOffset >= file->curOffset && + newOffset <= file->curOffset + file->nbytes) + { + /* No need to reset the current pos if the new pos is greater. */ + if (newOffset <= file->curOffset + file->pos) + file->pos = (int) (newOffset - file->curOffset); + + /* Adjust the nbytes for the current buffer. */ + file->nbytes = (int) (newOffset - file->curOffset); + } + else if (newFile == file->curFile && + newOffset < file->curOffset) + { + /* + * The truncate point is within the existing file but prior to the + * current position, so we can forget the current buffer and reset the + * current position. + */ + file->curOffset = newOffset; + file->pos = 0; + file->nbytes = 0; + } + else if (newFile < file->curFile) + { + /* + * The truncate point is prior to the current file, so need to reset + * the current position accordingly. + */ + file->curFile = newFile; + file->curOffset = newOffset; + file->pos = 0; + file->nbytes = 0; + } + /* Nothing to do, if the truncate point is beyond current file. */ +} diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 5f6420efb2d..f376a97ed67 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -1743,18 +1743,17 @@ PathNameCreateTemporaryFile(const char *path, bool error_on_failure) /* * Open a file that was created with PathNameCreateTemporaryFile, possibly in * another backend. Files opened this way don't count against the - * temp_file_limit of the caller, are read-only and are automatically closed - * at the end of the transaction but are not deleted on close. + * temp_file_limit of the caller, are automatically closed at the end of the + * transaction but are not deleted on close. */ File -PathNameOpenTemporaryFile(const char *path) +PathNameOpenTemporaryFile(const char *path, int mode) { File file; ResourceOwnerEnlargeFiles(CurrentResourceOwner); - /* We open the file read-only. */ - file = PathNameOpenFile(path, O_RDONLY | PG_BINARY); + file = PathNameOpenFile(path, mode | PG_BINARY); /* If no such file, then we don't raise an error. */ if (file <= 0 && errno != ENOENT) diff --git a/src/backend/storage/file/sharedfileset.c b/src/backend/storage/file/sharedfileset.c index 16b7594756c..65fd8ff5c0c 100644 --- a/src/backend/storage/file/sharedfileset.c +++ b/src/backend/storage/file/sharedfileset.c @@ -13,6 +13,10 @@ * files can be discovered by name, and a shared ownership semantics so that * shared files survive until the last user detaches. * + * SharedFileSets can be used by backends when the temporary files need to be + * opened/closed multiple times and the underlying files need to survive across + * transactions. + * *------------------------------------------------------------------------- */ @@ -25,25 +29,36 @@ #include "common/hashfn.h" #include "miscadmin.h" #include "storage/dsm.h" +#include "storage/ipc.h" #include "storage/sharedfileset.h" #include "utils/builtins.h" +static List *filesetlist = NIL; + static void SharedFileSetOnDetach(dsm_segment *segment, Datum datum); +static void SharedFileSetDeleteOnProcExit(int status, Datum arg); static void SharedFileSetPath(char *path, SharedFileSet *fileset, Oid tablespace); static void SharedFilePath(char *path, SharedFileSet *fileset, const char *name); static Oid ChooseTablespace(const SharedFileSet *fileset, const char *name); /* - * Initialize a space for temporary files that can be opened for read-only - * access by other backends. Other backends must attach to it before - * accessing it. Associate this SharedFileSet with 'seg'. Any contained - * files will be deleted when the last backend detaches. + * Initialize a space for temporary files that can be opened by other backends. + * Other backends must attach to it before accessing it. Associate this + * SharedFileSet with 'seg'. Any contained files will be deleted when the + * last backend detaches. + * + * We can also use this interface if the temporary files are used only by + * single backend but the files need to be opened and closed multiple times + * and also the underlying files need to survive across transactions. For + * such cases, dsm segment 'seg' should be passed as NULL. Callers are + * expected to explicitly remove such files by using SharedFileSetDelete/ + * SharedFileSetDeleteAll or we remove such files on proc exit. * * Files will be distributed over the tablespaces configured in * temp_tablespaces. * * Under the covers the set is one or more directories which will eventually - * be deleted when there are no backends attached. + * be deleted. */ void SharedFileSetInit(SharedFileSet *fileset, dsm_segment *seg) @@ -84,7 +99,25 @@ SharedFileSetInit(SharedFileSet *fileset, dsm_segment *seg) } /* Register our cleanup callback. */ - on_dsm_detach(seg, SharedFileSetOnDetach, PointerGetDatum(fileset)); + if (seg) + on_dsm_detach(seg, SharedFileSetOnDetach, PointerGetDatum(fileset)); + else + { + static bool registered_cleanup = false; + + if (!registered_cleanup) + { + /* + * We must not have registered any fileset before registering the + * fileset clean up. + */ + Assert(filesetlist == NIL); + on_proc_exit(SharedFileSetDeleteOnProcExit, 0); + registered_cleanup = true; + } + + filesetlist = lcons((void *) fileset, filesetlist); + } } /* @@ -147,13 +180,13 @@ SharedFileSetCreate(SharedFileSet *fileset, const char *name) * another backend. */ File -SharedFileSetOpen(SharedFileSet *fileset, const char *name) +SharedFileSetOpen(SharedFileSet *fileset, const char *name, int mode) { char path[MAXPGPATH]; File file; SharedFilePath(path, fileset, name); - file = PathNameOpenTemporaryFile(path); + file = PathNameOpenTemporaryFile(path, mode); return file; } @@ -192,6 +225,9 @@ SharedFileSetDeleteAll(SharedFileSet *fileset) SharedFileSetPath(dirpath, fileset, fileset->tablespaces[i]); PathNameDeleteTemporaryDir(dirpath); } + + /* Unregister the shared fileset */ + SharedFileSetUnregister(fileset); } /* @@ -223,6 +259,59 @@ SharedFileSetOnDetach(dsm_segment *segment, Datum datum) } /* + * Callback function that will be invoked on the process exit. This will + * process the list of all the registered sharedfilesets and delete the + * underlying files. + */ +static void +SharedFileSetDeleteOnProcExit(int status, Datum arg) +{ + ListCell *l; + + /* Loop over all the pending shared fileset entry */ + foreach(l, filesetlist) + { + SharedFileSet *fileset = (SharedFileSet *) lfirst(l); + + SharedFileSetDeleteAll(fileset); + } + + filesetlist = NIL; +} + +/* + * Unregister the shared fileset entry registered for cleanup on proc exit. + */ +void +SharedFileSetUnregister(SharedFileSet *input_fileset) +{ + bool found = false; + ListCell *l; + + /* + * If the caller is following the dsm based cleanup then we don't maintain + * the filesetlist so return. + */ + if (filesetlist == NIL) + return; + + foreach(l, filesetlist) + { + SharedFileSet *fileset = (SharedFileSet *) lfirst(l); + + /* Remove the entry from the list */ + if (input_fileset == fileset) + { + filesetlist = list_delete_cell(filesetlist, l); + found = true; + break; + } + } + + Assert(found); +} + +/* * Build the path for the directory holding the files backing a SharedFileSet * in a given tablespace. */ diff --git a/src/backend/utils/sort/logtape.c b/src/backend/utils/sort/logtape.c index 5517e59c50f..788815cdab6 100644 --- a/src/backend/utils/sort/logtape.c +++ b/src/backend/utils/sort/logtape.c @@ -78,6 +78,8 @@ #include "postgres.h" +#include <fcntl.h> + #include "storage/buffile.h" #include "utils/builtins.h" #include "utils/logtape.h" @@ -551,7 +553,7 @@ ltsConcatWorkerTapes(LogicalTapeSet *lts, TapeShare *shared, lt = <s->tapes[i]; pg_itoa(i, filename); - file = BufFileOpenShared(fileset, filename); + file = BufFileOpenShared(fileset, filename, O_RDONLY); filesize = BufFileSize(file); /* diff --git a/src/backend/utils/sort/sharedtuplestore.c b/src/backend/utils/sort/sharedtuplestore.c index 6537a4303b1..b83fb50dac8 100644 --- a/src/backend/utils/sort/sharedtuplestore.c +++ b/src/backend/utils/sort/sharedtuplestore.c @@ -559,7 +559,7 @@ sts_parallel_scan_next(SharedTuplestoreAccessor *accessor, void *meta_data) sts_filename(name, accessor, accessor->read_participant); accessor->read_file = - BufFileOpenShared(accessor->fileset, name); + BufFileOpenShared(accessor->fileset, name, O_RDONLY); } /* Seek and load the chunk header. */ diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 13872013823..807a9c1edf6 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -916,6 +916,7 @@ typedef enum WAIT_EVENT_BASEBACKUP_READ = PG_WAIT_IO, WAIT_EVENT_BUFFILE_READ, WAIT_EVENT_BUFFILE_WRITE, + WAIT_EVENT_BUFFILE_TRUNCATE, WAIT_EVENT_CONTROL_FILE_READ, WAIT_EVENT_CONTROL_FILE_SYNC, WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE, diff --git a/src/include/storage/buffile.h b/src/include/storage/buffile.h index f4752bab0da..fc34c49522d 100644 --- a/src/include/storage/buffile.h +++ b/src/include/storage/buffile.h @@ -48,7 +48,9 @@ extern long BufFileAppend(BufFile *target, BufFile *source); extern BufFile *BufFileCreateShared(SharedFileSet *fileset, const char *name); extern void BufFileExportShared(BufFile *file); -extern BufFile *BufFileOpenShared(SharedFileSet *fileset, const char *name); +extern BufFile *BufFileOpenShared(SharedFileSet *fileset, const char *name, + int mode); extern void BufFileDeleteShared(SharedFileSet *fileset, const char *name); +extern void BufFileTruncateShared(BufFile *file, int fileno, off_t offset); #endif /* BUFFILE_H */ diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 8cd125d7dfa..e209f047e85 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -94,7 +94,7 @@ extern mode_t FileGetRawMode(File file); /* Operations used for sharing named temporary files */ extern File PathNameCreateTemporaryFile(const char *name, bool error_on_failure); -extern File PathNameOpenTemporaryFile(const char *name); +extern File PathNameOpenTemporaryFile(const char *path, int mode); extern bool PathNameDeleteTemporaryFile(const char *name, bool error_on_failure); extern void PathNameCreateTemporaryDir(const char *base, const char *name); extern void PathNameDeleteTemporaryDir(const char *name); diff --git a/src/include/storage/sharedfileset.h b/src/include/storage/sharedfileset.h index 2d6cf077e51..d5edb600af9 100644 --- a/src/include/storage/sharedfileset.h +++ b/src/include/storage/sharedfileset.h @@ -37,9 +37,11 @@ typedef struct SharedFileSet extern void SharedFileSetInit(SharedFileSet *fileset, dsm_segment *seg); extern void SharedFileSetAttach(SharedFileSet *fileset, dsm_segment *seg); extern File SharedFileSetCreate(SharedFileSet *fileset, const char *name); -extern File SharedFileSetOpen(SharedFileSet *fileset, const char *name); +extern File SharedFileSetOpen(SharedFileSet *fileset, const char *name, + int mode); extern bool SharedFileSetDelete(SharedFileSet *fileset, const char *name, bool error_on_failure); extern void SharedFileSetDeleteAll(SharedFileSet *fileset); +extern void SharedFileSetUnregister(SharedFileSet *input_fileset); #endif |