diff options
Diffstat (limited to 'src/backend/storage/file/fd.c')
-rw-r--r-- | src/backend/storage/file/fd.c | 205 |
1 files changed, 25 insertions, 180 deletions
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 8dd51f17674..6611edbbd2c 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -16,8 +16,8 @@ * including base tables, scratch files (e.g., sort and hash spool * files), and random calls to C library routines like system(3); it * is quite easy to exceed system limits on the number of open files a - * single process can have. (This is around 256 on many modern - * operating systems, but can be as low as 32 on others.) + * single process can have. (This is around 1024 on many modern + * operating systems, but may be lower on others.) * * VFDs are managed as an LRU pool, with actual OS file descriptors * being opened and closed as needed. Obviously, if a routine is @@ -167,15 +167,6 @@ int max_safe_fds = 32; /* default if not changed */ #define FileIsNotOpen(file) (VfdCache[file].fd == VFD_CLOSED) -/* - * Note: a VFD's seekPos is normally always valid, but if for some reason - * an lseek() fails, it might become set to FileUnknownPos. We can struggle - * along without knowing the seek position in many cases, but in some places - * we have to fail if we don't have it. - */ -#define FileUnknownPos ((off_t) -1) -#define FilePosIsUnknown(pos) ((pos) < 0) - /* these are the assigned bits in fdstate below: */ #define FD_DELETE_AT_CLOSE (1 << 0) /* T = delete when closed */ #define FD_CLOSE_AT_EOXACT (1 << 1) /* T = close at eoXact */ @@ -189,7 +180,6 @@ typedef struct vfd File nextFree; /* link to next free VFD, if in freelist */ File lruMoreRecently; /* doubly linked recency-of-use list */ File lruLessRecently; - off_t seekPos; /* current logical file position, or -1 */ off_t fileSize; /* current size of file (0 if not temporary) */ char *fileName; /* name of file, or NULL for unused VFD */ /* NB: fileName is malloc'd, and must be free'd when closing the VFD */ @@ -407,9 +397,7 @@ pg_fdatasync(int fd) /* * pg_flush_data --- advise OS that the described dirty data should be flushed * - * offset of 0 with nbytes 0 means that the entire file should be flushed; - * in this case, this function may have side-effects on the file's - * seek position! + * offset of 0 with nbytes 0 means that the entire file should be flushed */ void pg_flush_data(int fd, off_t offset, off_t nbytes) @@ -1030,22 +1018,6 @@ LruDelete(File file) vfdP = &VfdCache[file]; /* - * Normally we should know the seek position, but if for some reason we - * have lost track of it, try again to get it. If we still can't get it, - * we have a problem: we will be unable to restore the file seek position - * when and if the file is re-opened. But we can't really throw an error - * and refuse to close the file, or activities such as transaction cleanup - * will be broken. - */ - if (FilePosIsUnknown(vfdP->seekPos)) - { - vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR); - if (FilePosIsUnknown(vfdP->seekPos)) - elog(LOG, "could not seek file \"%s\" before closing: %m", - vfdP->fileName); - } - - /* * Close the file. We aren't expecting this to fail; if it does, better * to leak the FD than to mess up our internal state. */ @@ -1113,33 +1085,6 @@ LruInsert(File file) { ++nfile; } - - /* - * Seek to the right position. We need no special case for seekPos - * equal to FileUnknownPos, as lseek() will certainly reject that - * (thus completing the logic noted in LruDelete() that we will fail - * to re-open a file if we couldn't get its seek position before - * closing). - */ - if (vfdP->seekPos != (off_t) 0) - { - if (lseek(vfdP->fd, vfdP->seekPos, SEEK_SET) < 0) - { - /* - * If we fail to restore the seek position, treat it like an - * open() failure. - */ - int save_errno = errno; - - elog(LOG, "could not seek file \"%s\" after re-opening: %m", - vfdP->fileName); - (void) close(vfdP->fd); - vfdP->fd = VFD_CLOSED; - --nfile; - errno = save_errno; - return -1; - } - } } /* @@ -1406,7 +1351,6 @@ PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode) /* Saved flags are adjusted to be OK for re-opening file */ vfdP->fileFlags = fileFlags & ~(O_CREAT | O_TRUNC | O_EXCL); vfdP->fileMode = fileMode; - vfdP->seekPos = 0; vfdP->fileSize = 0; vfdP->fdstate = 0x0; vfdP->resowner = NULL; @@ -1820,7 +1764,6 @@ FileClose(File file) /* * FilePrefetch - initiate asynchronous read of a given range of the file. - * The logical seek position is unaffected. * * Currently the only implementation of this function is using posix_fadvise * which is the simplest standardized interface that accomplishes this. @@ -1867,10 +1810,6 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info) file, VfdCache[file].fileName, (int64) offset, (int64) nbytes)); - /* - * Caution: do not call pg_flush_data with nbytes = 0, it could trash the - * file's seek position. We prefer to define that as a no-op here. - */ if (nbytes <= 0) return; @@ -1884,7 +1823,8 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info) } int -FileRead(File file, char *buffer, int amount, uint32 wait_event_info) +FileRead(File file, char *buffer, int amount, off_t offset, + uint32 wait_event_info) { int returnCode; Vfd *vfdP; @@ -1893,7 +1833,7 @@ FileRead(File file, char *buffer, int amount, uint32 wait_event_info) DO_DB(elog(LOG, "FileRead: %d (%s) " INT64_FORMAT " %d %p", file, VfdCache[file].fileName, - (int64) VfdCache[file].seekPos, + (int64) offset, amount, buffer)); returnCode = FileAccess(file); @@ -1904,16 +1844,10 @@ FileRead(File file, char *buffer, int amount, uint32 wait_event_info) retry: pgstat_report_wait_start(wait_event_info); - returnCode = read(vfdP->fd, buffer, amount); + returnCode = pg_pread(vfdP->fd, buffer, amount, offset); pgstat_report_wait_end(); - if (returnCode >= 0) - { - /* if seekPos is unknown, leave it that way */ - if (!FilePosIsUnknown(vfdP->seekPos)) - vfdP->seekPos += returnCode; - } - else + if (returnCode < 0) { /* * Windows may run out of kernel buffers and return "Insufficient @@ -1939,16 +1873,14 @@ retry: /* OK to retry if interrupted */ if (errno == EINTR) goto retry; - - /* Trouble, so assume we don't know the file position anymore */ - vfdP->seekPos = FileUnknownPos; } return returnCode; } int -FileWrite(File file, char *buffer, int amount, uint32 wait_event_info) +FileWrite(File file, char *buffer, int amount, off_t offset, + uint32 wait_event_info) { int returnCode; Vfd *vfdP; @@ -1957,7 +1889,7 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info) DO_DB(elog(LOG, "FileWrite: %d (%s) " INT64_FORMAT " %d %p", file, VfdCache[file].fileName, - (int64) VfdCache[file].seekPos, + (int64) offset, amount, buffer)); returnCode = FileAccess(file); @@ -1976,26 +1908,13 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info) */ if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMP_FILE_LIMIT)) { - off_t newPos; + off_t past_write = offset + amount; - /* - * Normally we should know the seek position, but if for some reason - * we have lost track of it, try again to get it. Here, it's fine to - * throw an error if we still can't get it. - */ - if (FilePosIsUnknown(vfdP->seekPos)) - { - vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR); - if (FilePosIsUnknown(vfdP->seekPos)) - elog(ERROR, "could not seek file \"%s\": %m", vfdP->fileName); - } - - newPos = vfdP->seekPos + amount; - if (newPos > vfdP->fileSize) + if (past_write > vfdP->fileSize) { uint64 newTotal = temporary_files_size; - newTotal += newPos - vfdP->fileSize; + newTotal += past_write - vfdP->fileSize; if (newTotal > (uint64) temp_file_limit * (uint64) 1024) ereport(ERROR, (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), @@ -2007,7 +1926,7 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info) retry: errno = 0; pgstat_report_wait_start(wait_event_info); - returnCode = write(vfdP->fd, buffer, amount); + returnCode = pg_pwrite(VfdCache[file].fd, buffer, amount, offset); pgstat_report_wait_end(); /* if write didn't set errno, assume problem is no disk space */ @@ -2016,10 +1935,6 @@ retry: if (returnCode >= 0) { - /* if seekPos is unknown, leave it that way */ - if (!FilePosIsUnknown(vfdP->seekPos)) - vfdP->seekPos += returnCode; - /* * Maintain fileSize and temporary_files_size if it's a temp file. * @@ -2029,12 +1944,12 @@ retry: */ if (vfdP->fdstate & FD_TEMP_FILE_LIMIT) { - off_t newPos = vfdP->seekPos; + off_t past_write = offset + amount; - if (newPos > vfdP->fileSize) + if (past_write > vfdP->fileSize) { - temporary_files_size += newPos - vfdP->fileSize; - vfdP->fileSize = newPos; + temporary_files_size += past_write - vfdP->fileSize; + vfdP->fileSize = past_write; } } } @@ -2060,9 +1975,6 @@ retry: /* OK to retry if interrupted */ if (errno == EINTR) goto retry; - - /* Trouble, so assume we don't know the file position anymore */ - vfdP->seekPos = FileUnknownPos; } return returnCode; @@ -2090,92 +2002,25 @@ FileSync(File file, uint32 wait_event_info) } off_t -FileSeek(File file, off_t offset, int whence) +FileSize(File file) { Vfd *vfdP; Assert(FileIsValid(file)); - DO_DB(elog(LOG, "FileSeek: %d (%s) " INT64_FORMAT " " INT64_FORMAT " %d", - file, VfdCache[file].fileName, - (int64) VfdCache[file].seekPos, - (int64) offset, whence)); + DO_DB(elog(LOG, "FileSize %d (%s)", + file, VfdCache[file].fileName)); vfdP = &VfdCache[file]; if (FileIsNotOpen(file)) { - switch (whence) - { - case SEEK_SET: - if (offset < 0) - { - errno = EINVAL; - return (off_t) -1; - } - vfdP->seekPos = offset; - break; - case SEEK_CUR: - if (FilePosIsUnknown(vfdP->seekPos) || - vfdP->seekPos + offset < 0) - { - errno = EINVAL; - return (off_t) -1; - } - vfdP->seekPos += offset; - break; - case SEEK_END: - if (FileAccess(file) < 0) - return (off_t) -1; - vfdP->seekPos = lseek(vfdP->fd, offset, whence); - break; - default: - elog(ERROR, "invalid whence: %d", whence); - break; - } + if (FileAccess(file) < 0) + return (off_t) -1; } - else - { - switch (whence) - { - case SEEK_SET: - if (offset < 0) - { - errno = EINVAL; - return (off_t) -1; - } - if (vfdP->seekPos != offset) - vfdP->seekPos = lseek(vfdP->fd, offset, whence); - break; - case SEEK_CUR: - if (offset != 0 || FilePosIsUnknown(vfdP->seekPos)) - vfdP->seekPos = lseek(vfdP->fd, offset, whence); - break; - case SEEK_END: - vfdP->seekPos = lseek(vfdP->fd, offset, whence); - break; - default: - elog(ERROR, "invalid whence: %d", whence); - break; - } - } - - return vfdP->seekPos; -} -/* - * XXX not actually used but here for completeness - */ -#ifdef NOT_USED -off_t -FileTell(File file) -{ - Assert(FileIsValid(file)); - DO_DB(elog(LOG, "FileTell %d (%s)", - file, VfdCache[file].fileName)); - return VfdCache[file].seekPos; + return lseek(VfdCache[file].fd, 0, SEEK_END); } -#endif int FileTruncate(File file, off_t offset, uint32 wait_event_info) |