diff options
author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2013-02-27 18:17:21 +0200 |
---|---|---|
committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2013-02-27 18:22:31 +0200 |
commit | 3d009e45bde2a2681826ef549637ada76508b597 (patch) | |
tree | 6f429ba5f7bbfee65dfd14fcfacd19a2e0ddd053 /src/backend | |
parent | 73dc003beef859e0b67da463c5e28f5468d3f17f (diff) | |
download | postgresql-3d009e45bde2a2681826ef549637ada76508b597.tar.gz postgresql-3d009e45bde2a2681826ef549637ada76508b597.zip |
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/commands/copy.c | 205 | ||||
-rw-r--r-- | src/backend/nodes/copyfuncs.c | 1 | ||||
-rw-r--r-- | src/backend/nodes/equalfuncs.c | 1 | ||||
-rw-r--r-- | src/backend/parser/gram.y | 48 | ||||
-rw-r--r-- | src/backend/storage/file/fd.c | 98 |
5 files changed, 279 insertions, 74 deletions
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 523c1e03315..c651ea30280 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -58,7 +58,7 @@ */ typedef enum CopyDest { - COPY_FILE, /* to/from file */ + COPY_FILE, /* to/from file (or a piped program) */ COPY_OLD_FE, /* to/from frontend (2.0 protocol) */ COPY_NEW_FE /* to/from frontend (3.0 protocol) */ } CopyDest; @@ -108,6 +108,7 @@ typedef struct CopyStateData QueryDesc *queryDesc; /* executable query to copy from */ List *attnumlist; /* integer list of attnums to copy */ char *filename; /* filename, or NULL for STDIN/STDOUT */ + bool is_program; /* is 'filename' a program to popen? */ bool binary; /* binary format? */ bool oids; /* include OIDs? */ bool freeze; /* freeze rows on loading? */ @@ -277,8 +278,10 @@ static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0"; static CopyState BeginCopy(bool is_from, Relation rel, Node *raw_query, const char *queryString, List *attnamelist, List *options); static void EndCopy(CopyState cstate); +static void ClosePipeToProgram(CopyState cstate); static CopyState BeginCopyTo(Relation rel, Node *query, const char *queryString, - const char *filename, List *attnamelist, List *options); + const char *filename, bool is_program, List *attnamelist, + List *options); static void EndCopyTo(CopyState cstate); static uint64 DoCopyTo(CopyState cstate); static uint64 CopyTo(CopyState cstate); @@ -482,9 +485,35 @@ CopySendEndOfRow(CopyState cstate) if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1, cstate->copy_file) != 1 || ferror(cstate->copy_file)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write to COPY file: %m"))); + { + if (cstate->is_program) + { + if (errno == EPIPE) + { + /* + * The pipe will be closed automatically on error at + * the end of transaction, but we might get a better + * error message from the subprocess' exit code than + * just "Broken Pipe" + */ + ClosePipeToProgram(cstate); + + /* + * If ClosePipeToProgram() didn't throw an error, + * the program terminated normally, but closed the + * pipe first. Restore errno, and throw an error. + */ + errno = EPIPE; + } + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to COPY program: %m"))); + } + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to COPY file: %m"))); + } break; case COPY_OLD_FE: /* The FE/BE protocol uses \n as newline for all platforms */ @@ -752,13 +781,22 @@ DoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed) Relation rel; Oid relid; - /* Disallow file COPY except to superusers. */ + /* Disallow COPY to/from file or program except to superusers. */ if (!pipe && !superuser()) - ereport(ERROR, - (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("must be superuser to COPY to or from a file"), - errhint("Anyone can COPY to stdout or from stdin. " - "psql's \\copy command also works for anyone."))); + { + if (stmt->is_program) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to COPY to or from an external program"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); + else + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to COPY to or from a file"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); + } if (stmt->relation) { @@ -812,14 +850,15 @@ DoCopy(const CopyStmt *stmt, const char *queryString, uint64 *processed) if (XactReadOnly && !rel->rd_islocaltemp) PreventCommandIfReadOnly("COPY FROM"); - cstate = BeginCopyFrom(rel, stmt->filename, + cstate = BeginCopyFrom(rel, stmt->filename, stmt->is_program, stmt->attlist, stmt->options); *processed = CopyFrom(cstate); /* copy from file to database */ EndCopyFrom(cstate); } else { - cstate = BeginCopyTo(rel, stmt->query, queryString, stmt->filename, + cstate = BeginCopyTo(rel, stmt->query, queryString, + stmt->filename, stmt->is_program, stmt->attlist, stmt->options); *processed = DoCopyTo(cstate); /* copy from database to file */ EndCopyTo(cstate); @@ -1390,16 +1429,44 @@ BeginCopy(bool is_from, } /* + * Closes the pipe to an external program, checking the pclose() return code. + */ +static void +ClosePipeToProgram(CopyState cstate) +{ + int pclose_rc; + + Assert(cstate->is_program); + + pclose_rc = ClosePipeStream(cstate->copy_file); + if (pclose_rc == -1) + ereport(ERROR, + (errmsg("could not close pipe to external command: %m"))); + else if (pclose_rc != 0) + ereport(ERROR, + (errmsg("program \"%s\" failed", + cstate->filename), + errdetail_internal("%s", wait_result_to_str(pclose_rc)))); +} + +/* * Release resources allocated in a cstate for COPY TO/FROM. */ static void EndCopy(CopyState cstate) { - if (cstate->filename != NULL && FreeFile(cstate->copy_file)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not close file \"%s\": %m", - cstate->filename))); + if (cstate->is_program) + { + ClosePipeToProgram(cstate); + } + else + { + if (cstate->filename != NULL && FreeFile(cstate->copy_file)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", + cstate->filename))); + } MemoryContextDelete(cstate->copycontext); pfree(cstate); @@ -1413,6 +1480,7 @@ BeginCopyTo(Relation rel, Node *query, const char *queryString, const char *filename, + bool is_program, List *attnamelist, List *options) { @@ -1451,39 +1519,52 @@ BeginCopyTo(Relation rel, if (pipe) { + Assert(!is_program); /* the grammar does not allow this */ if (whereToSendOutput != DestRemote) cstate->copy_file = stdout; } else { - mode_t oumask; /* Pre-existing umask value */ - struct stat st; + cstate->filename = pstrdup(filename); + cstate->is_program = is_program; - /* - * Prevent write to relative path ... too easy to shoot oneself in the - * foot by overwriting a database file ... - */ - if (!is_absolute_path(filename)) - ereport(ERROR, - (errcode(ERRCODE_INVALID_NAME), - errmsg("relative path not allowed for COPY to file"))); + if (is_program) + { + cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_W); + if (cstate->copy_file == NULL) + ereport(ERROR, + (errmsg("could not execute command \"%s\": %m", + cstate->filename))); + } + else + { + mode_t oumask; /* Pre-existing umask value */ + struct stat st; - cstate->filename = pstrdup(filename); - oumask = umask(S_IWGRP | S_IWOTH); - cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W); - umask(oumask); + /* + * Prevent write to relative path ... too easy to shoot oneself in + * the foot by overwriting a database file ... + */ + if (!is_absolute_path(filename)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_NAME), + errmsg("relative path not allowed for COPY to file"))); - if (cstate->copy_file == NULL) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not open file \"%s\" for writing: %m", - cstate->filename))); + oumask = umask(S_IWGRP | S_IWOTH); + cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W); + umask(oumask); + if (cstate->copy_file == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\" for writing: %m", + cstate->filename))); - fstat(fileno(cstate->copy_file), &st); - if (S_ISDIR(st.st_mode)) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("\"%s\" is a directory", cstate->filename))); + fstat(fileno(cstate->copy_file), &st); + if (S_ISDIR(st.st_mode)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a directory", cstate->filename))); + } } MemoryContextSwitchTo(oldcontext); @@ -2317,6 +2398,7 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid, CopyState BeginCopyFrom(Relation rel, const char *filename, + bool is_program, List *attnamelist, List *options) { @@ -2413,9 +2495,11 @@ BeginCopyFrom(Relation rel, cstate->defexprs = defexprs; cstate->volatile_defexprs = volatile_defexprs; cstate->num_defaults = num_defaults; + cstate->is_program = is_program; if (pipe) { + Assert(!is_program); /* the grammar does not allow this */ if (whereToSendOutput == DestRemote) ReceiveCopyBegin(cstate); else @@ -2423,22 +2507,33 @@ BeginCopyFrom(Relation rel, } else { - struct stat st; - cstate->filename = pstrdup(filename); - cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_R); - if (cstate->copy_file == NULL) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not open file \"%s\" for reading: %m", - cstate->filename))); + if (cstate->is_program) + { + cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_R); + if (cstate->copy_file == NULL) + ereport(ERROR, + (errmsg("could not execute command \"%s\": %m", + cstate->filename))); + } + else + { + struct stat st; - fstat(fileno(cstate->copy_file), &st); - if (S_ISDIR(st.st_mode)) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("\"%s\" is a directory", cstate->filename))); + cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_R); + if (cstate->copy_file == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\" for reading: %m", + cstate->filename))); + + fstat(fileno(cstate->copy_file), &st); + if (S_ISDIR(st.st_mode)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a directory", cstate->filename))); + } } if (!cstate->binary) diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 2da08d1cc15..23ec88d54c7 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2703,6 +2703,7 @@ _copyCopyStmt(const CopyStmt *from) COPY_NODE_FIELD(query); COPY_NODE_FIELD(attlist); COPY_SCALAR_FIELD(is_from); + COPY_SCALAR_FIELD(is_program); COPY_STRING_FIELD(filename); COPY_NODE_FIELD(options); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 9e313c8b1be..99c034ab684 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -1090,6 +1090,7 @@ _equalCopyStmt(const CopyStmt *a, const CopyStmt *b) COMPARE_NODE_FIELD(query); COMPARE_NODE_FIELD(attlist); COMPARE_SCALAR_FIELD(is_from); + COMPARE_SCALAR_FIELD(is_program); COMPARE_STRING_FIELD(filename); COMPARE_NODE_FIELD(options); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index b998431f5f3..d3009b67b41 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -381,7 +381,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type <boolean> opt_freeze opt_default opt_recheck %type <defelt> opt_binary opt_oids copy_delimiter -%type <boolean> copy_from +%type <boolean> copy_from opt_program %type <ival> opt_column event cursor_options opt_hold opt_set_data %type <objtype> reindex_type drop_type comment_type security_label_type @@ -568,7 +568,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); PARSER PARTIAL PARTITION PASSING PASSWORD PLACING PLANS POSITION PRECEDING PRECISION PRESERVE PREPARE PREPARED PRIMARY - PRIOR PRIVILEGES PROCEDURAL PROCEDURE + PRIOR PRIVILEGES PROCEDURAL PROCEDURE PROGRAM QUOTE @@ -2309,7 +2309,10 @@ ClosePortalStmt: * * QUERY : * COPY relname [(columnList)] FROM/TO file [WITH] [(options)] - * COPY ( SELECT ... ) TO file [WITH] [(options)] + * COPY ( SELECT ... ) TO file [WITH] [(options)] + * + * where 'file' can be one of: + * { PROGRAM 'command' | STDIN | STDOUT | 'filename' } * * In the preferred syntax the options are comma-separated * and use generic identifiers instead of keywords. The pre-9.0 @@ -2324,14 +2327,21 @@ ClosePortalStmt: *****************************************************************************/ CopyStmt: COPY opt_binary qualified_name opt_column_list opt_oids - copy_from copy_file_name copy_delimiter opt_with copy_options + copy_from opt_program copy_file_name copy_delimiter opt_with copy_options { CopyStmt *n = makeNode(CopyStmt); n->relation = $3; n->query = NULL; n->attlist = $4; n->is_from = $6; - n->filename = $7; + n->is_program = $7; + n->filename = $8; + + if (n->is_program && n->filename == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("STDIN/STDOUT not allowed with PROGRAM"), + parser_errposition(@8))); n->options = NIL; /* Concatenate user-supplied flags */ @@ -2339,21 +2349,29 @@ CopyStmt: COPY opt_binary qualified_name opt_column_list opt_oids n->options = lappend(n->options, $2); if ($5) n->options = lappend(n->options, $5); - if ($8) - n->options = lappend(n->options, $8); - if ($10) - n->options = list_concat(n->options, $10); + if ($9) + n->options = lappend(n->options, $9); + if ($11) + n->options = list_concat(n->options, $11); $$ = (Node *)n; } - | COPY select_with_parens TO copy_file_name opt_with copy_options + | COPY select_with_parens TO opt_program copy_file_name opt_with copy_options { CopyStmt *n = makeNode(CopyStmt); n->relation = NULL; n->query = $2; n->attlist = NIL; n->is_from = false; - n->filename = $4; - n->options = $6; + n->is_program = $4; + n->filename = $5; + n->options = $7; + + if (n->is_program && n->filename == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("STDIN/STDOUT not allowed with PROGRAM"), + parser_errposition(@5))); + $$ = (Node *)n; } ; @@ -2363,6 +2381,11 @@ copy_from: | TO { $$ = FALSE; } ; +opt_program: + PROGRAM { $$ = TRUE; } + | /* EMPTY */ { $$ = FALSE; } + ; + /* * copy_file_name NULL indicates stdio is used. Whether stdin or stdout is * used depends on the direction. (It really doesn't make sense to copy from @@ -12666,6 +12689,7 @@ unreserved_keyword: | PRIVILEGES | PROCEDURAL | PROCEDURE + | PROGRAM | QUOTE | RANGE | READ diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index ba1b84eadef..c31a523857d 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -39,13 +39,13 @@ * for a long time, like relation files. It is the caller's responsibility * to close them, there is no automatic mechanism in fd.c for that. * - * AllocateFile, AllocateDir and OpenTransientFile are wrappers around - * fopen(3), opendir(3), and open(2), respectively. They behave like the - * corresponding native functions, except that the handle is registered with - * the current subtransaction, and will be automatically closed at abort. - * These are intended for short operations like reading a configuration file, - * and there is a fixed limit on the number of files that can be opened using - * these functions at any one time. + * AllocateFile, AllocateDir, OpenPipeStream and OpenTransientFile are + * wrappers around fopen(3), opendir(3), popen(3) and open(2), respectively. + * They behave like the corresponding native functions, except that the handle + * is registered with the current subtransaction, and will be automatically + * closed at abort. These are intended for short operations like reading a + * configuration file, and there is a fixed limit on the number of files that + * can be opened using these functions at any one time. * * Finally, BasicOpenFile is just a thin wrapper around open() that can * release file descriptors in use by the virtual file descriptors if @@ -202,6 +202,7 @@ static uint64 temporary_files_size = 0; typedef enum { AllocateDescFile, + AllocateDescPipe, AllocateDescDir, AllocateDescRawFD } AllocateDescKind; @@ -1586,6 +1587,61 @@ OpenTransientFile(FileName fileName, int fileFlags, int fileMode) } /* + * Routines that want to initiate a pipe stream should use OpenPipeStream + * rather than plain popen(). This lets fd.c deal with freeing FDs if + * necessary. When done, call ClosePipeStream rather than pclose. + */ +FILE * +OpenPipeStream(const char *command, const char *mode) +{ + FILE *file; + + DO_DB(elog(LOG, "OpenPipeStream: Allocated %d (%s)", + numAllocatedDescs, command)); + + /* + * The test against MAX_ALLOCATED_DESCS prevents us from overflowing + * allocatedFiles[]; the test against max_safe_fds prevents AllocateFile + * from hogging every one of the available FDs, which'd lead to infinite + * looping. + */ + if (numAllocatedDescs >= MAX_ALLOCATED_DESCS || + numAllocatedDescs >= max_safe_fds - 1) + elog(ERROR, "exceeded MAX_ALLOCATED_DESCS while trying to execute command \"%s\"", + command); + +TryAgain: + fflush(stdout); + fflush(stderr); + errno = 0; + if ((file = popen(command, mode)) != NULL) + { + AllocateDesc *desc = &allocatedDescs[numAllocatedDescs]; + + desc->kind = AllocateDescPipe; + desc->desc.file = file; + desc->create_subid = GetCurrentSubTransactionId(); + numAllocatedDescs++; + return desc->desc.file; + } + + if (errno == EMFILE || errno == ENFILE) + { + int save_errno = errno; + + ereport(LOG, + (errcode(ERRCODE_INSUFFICIENT_RESOURCES), + errmsg("out of file descriptors: %m; release and retry"))); + errno = 0; + if (ReleaseLruFile()) + goto TryAgain; + errno = save_errno; + } + + return NULL; +} + +/* * Free an AllocateDesc of any type. * * The argument *must* point into the allocatedDescs[] array. @@ -1601,6 +1657,9 @@ FreeDesc(AllocateDesc *desc) case AllocateDescFile: result = fclose(desc->desc.file); break; + case AllocateDescPipe: + result = pclose(desc->desc.file); + break; case AllocateDescDir: result = closedir(desc->desc.dir); break; @@ -1815,6 +1874,31 @@ FreeDir(DIR *dir) /* + * Close a pipe stream returned by OpenPipeStream. + */ +int +ClosePipeStream(FILE *file) +{ + int i; + + DO_DB(elog(LOG, "ClosePipeStream: Allocated %d", numAllocatedDescs)); + + /* Remove file from list of allocated files, if it's present */ + for (i = numAllocatedDescs; --i >= 0;) + { + AllocateDesc *desc = &allocatedDescs[i]; + + if (desc->kind == AllocateDescPipe && desc->desc.file == file) + return FreeDesc(desc); + } + + /* Only get here if someone passes us a file not in allocatedDescs */ + elog(WARNING, "file passed to ClosePipeStream was not obtained from OpenPipeStream"); + + return pclose(file); +} + +/* * closeAllVfds * * Force all VFDs into the physically-closed state, so that the fewest |