aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2016-09-29 13:32:27 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2016-09-29 13:32:34 -0400
commit8e91e12bc3af85ba2287866669268f6825d2cc03 (patch)
treea44bfbe353b0ceb92a4170c6118db5ec40b86e10
parent728a3e73e96173bb0b0b2afc0da061991f9e3d5d (diff)
downloadpostgresql-8e91e12bc3af85ba2287866669268f6825d2cc03.tar.gz
postgresql-8e91e12bc3af85ba2287866669268f6825d2cc03.zip
Allow contrib/file_fdw to read from a program, like COPY FROM PROGRAM.
This patch just exposes COPY's FROM PROGRAM option in contrib/file_fdw. There don't seem to be any security issues with that that are any worse than what already exist with file_fdw and COPY; as in the existing cases, only superusers are allowed to control what gets executed. A regression test case might be nice here, but choosing a 100% portable command to run is hard. (We haven't got a test for COPY FROM PROGRAM itself, either.) Corey Huinker and Adam Gomaa, reviewed by Amit Langote Discussion: <CADkLM=dGDGmaEiZ=UDepzumWg-CVn7r8MHPjr2NArj8S3TsROQ@mail.gmail.com>
-rw-r--r--contrib/file_fdw/file_fdw.c131
-rw-r--r--contrib/file_fdw/output/file_fdw.source6
-rw-r--r--doc/src/sgml/file-fdw.sgml66
3 files changed, 137 insertions, 66 deletions
diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index b4719913183..d325f534676 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -1,7 +1,7 @@
/*-------------------------------------------------------------------------
*
* file_fdw.c
- * foreign-data wrapper for server-side flat files.
+ * foreign-data wrapper for server-side flat files (or programs).
*
* Copyright (c) 2010-2016, PostgreSQL Global Development Group
*
@@ -57,8 +57,9 @@ struct FileFdwOption
* fileGetOptions(), which currently doesn't bother to look at user mappings.
*/
static const struct FileFdwOption valid_options[] = {
- /* File options */
+ /* Data source options */
{"filename", ForeignTableRelationId},
+ {"program", ForeignTableRelationId},
/* Format options */
/* oids option is not supported */
@@ -85,10 +86,12 @@ static const struct FileFdwOption valid_options[] = {
*/
typedef struct FileFdwPlanState
{
- char *filename; /* file to read */
- List *options; /* merged COPY options, excluding filename */
+ char *filename; /* file or program to read from */
+ bool is_program; /* true if filename represents an OS command */
+ List *options; /* merged COPY options, excluding filename and
+ * is_program */
BlockNumber pages; /* estimate of file's physical size */
- double ntuples; /* estimate of number of rows in file */
+ double ntuples; /* estimate of number of data rows */
} FileFdwPlanState;
/*
@@ -96,9 +99,11 @@ typedef struct FileFdwPlanState
*/
typedef struct FileFdwExecutionState
{
- char *filename; /* file to read */
- List *options; /* merged COPY options, excluding filename */
- CopyState cstate; /* state of reading file */
+ char *filename; /* file or program to read from */
+ bool is_program; /* true if filename represents an OS command */
+ List *options; /* merged COPY options, excluding filename and
+ * is_program */
+ CopyState cstate; /* COPY execution state */
} FileFdwExecutionState;
/*
@@ -139,7 +144,9 @@ static bool fileIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel,
*/
static bool is_valid_option(const char *option, Oid context);
static void fileGetOptions(Oid foreigntableid,
- char **filename, List **other_options);
+ char **filename,
+ bool *is_program,
+ List **other_options);
static List *get_file_fdw_attribute_options(Oid relid);
static bool check_selective_binary_conversion(RelOptInfo *baserel,
Oid foreigntableid,
@@ -196,16 +203,16 @@ file_fdw_validator(PG_FUNCTION_ARGS)
/*
* Only superusers are allowed to set options of a file_fdw foreign table.
- * This is because the filename is one of those options, and we don't want
- * non-superusers to be able to determine which file gets read.
+ * This is because we don't want non-superusers to be able to control
+ * which file gets read or which program gets executed.
*
* Putting this sort of permissions check in a validator is a bit of a
* crock, but there doesn't seem to be any other place that can enforce
* the check more cleanly.
*
- * Note that the valid_options[] array disallows setting filename at any
- * options level other than foreign table --- otherwise there'd still be a
- * security hole.
+ * Note that the valid_options[] array disallows setting filename and
+ * program at any options level other than foreign table --- otherwise
+ * there'd still be a security hole.
*/
if (catalog == ForeignTableRelationId && !superuser())
ereport(ERROR,
@@ -247,11 +254,11 @@ file_fdw_validator(PG_FUNCTION_ARGS)
}
/*
- * Separate out filename and column-specific options, since
+ * Separate out filename, program, and column-specific options, since
* ProcessCopyOptions won't accept them.
*/
-
- if (strcmp(def->defname, "filename") == 0)
+ if (strcmp(def->defname, "filename") == 0 ||
+ strcmp(def->defname, "program") == 0)
{
if (filename)
ereport(ERROR,
@@ -296,12 +303,13 @@ file_fdw_validator(PG_FUNCTION_ARGS)
ProcessCopyOptions(NULL, NULL, true, other_options);
/*
- * Filename option is required for file_fdw foreign tables.
+ * Either filename or program option is required for file_fdw foreign
+ * tables.
*/
if (catalog == ForeignTableRelationId && filename == NULL)
ereport(ERROR,
(errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED),
- errmsg("filename is required for file_fdw foreign tables")));
+ errmsg("either filename or program is required for file_fdw foreign tables")));
PG_RETURN_VOID();
}
@@ -326,12 +334,12 @@ is_valid_option(const char *option, Oid context)
/*
* Fetch the options for a file_fdw foreign table.
*
- * We have to separate out "filename" from the other options because
- * it must not appear in the options list passed to the core COPY code.
+ * We have to separate out filename/program from the other options because
+ * those must not appear in the options list passed to the core COPY code.
*/
static void
fileGetOptions(Oid foreigntableid,
- char **filename, List **other_options)
+ char **filename, bool *is_program, List **other_options)
{
ForeignTable *table;
ForeignServer *server;
@@ -359,9 +367,11 @@ fileGetOptions(Oid foreigntableid,
options = list_concat(options, get_file_fdw_attribute_options(foreigntableid));
/*
- * Separate out the filename.
+ * Separate out the filename or program option (we assume there is only
+ * one).
*/
*filename = NULL;
+ *is_program = false;
prev = NULL;
foreach(lc, options)
{
@@ -373,15 +383,22 @@ fileGetOptions(Oid foreigntableid,
options = list_delete_cell(options, lc, prev);
break;
}
+ else if (strcmp(def->defname, "program") == 0)
+ {
+ *filename = defGetString(def);
+ *is_program = true;
+ options = list_delete_cell(options, lc, prev);
+ break;
+ }
prev = lc;
}
/*
- * The validator should have checked that a filename was included in the
- * options, but check again, just in case.
+ * The validator should have checked that filename or program was included
+ * in the options, but check again, just in case.
*/
if (*filename == NULL)
- elog(ERROR, "filename is required for file_fdw foreign tables");
+ elog(ERROR, "either filename or program is required for file_fdw foreign tables");
*other_options = options;
}
@@ -475,12 +492,15 @@ fileGetForeignRelSize(PlannerInfo *root,
FileFdwPlanState *fdw_private;
/*
- * Fetch options. We only need filename at this point, but we might as
- * well get everything and not need to re-fetch it later in planning.
+ * Fetch options. We only need filename (or program) at this point, but
+ * we might as well get everything and not need to re-fetch it later in
+ * planning.
*/
fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState));
fileGetOptions(foreigntableid,
- &fdw_private->filename, &fdw_private->options);
+ &fdw_private->filename,
+ &fdw_private->is_program,
+ &fdw_private->options);
baserel->fdw_private = (void *) fdw_private;
/* Estimate relation size */
@@ -583,20 +603,25 @@ static void
fileExplainForeignScan(ForeignScanState *node, ExplainState *es)
{
char *filename;
+ bool is_program;
List *options;
- /* Fetch options --- we only need filename at this point */
+ /* Fetch options --- we only need filename and is_program at this point */
fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
- &filename, &options);
+ &filename, &is_program, &options);
- ExplainPropertyText("Foreign File", filename, es);
+ if (is_program)
+ ExplainPropertyText("Foreign Program", filename, es);
+ else
+ ExplainPropertyText("Foreign File", filename, es);
/* Suppress file size if we're not showing cost details */
if (es->costs)
{
struct stat stat_buf;
- if (stat(filename, &stat_buf) == 0)
+ if (!is_program &&
+ stat(filename, &stat_buf) == 0)
ExplainPropertyLong("Foreign File Size", (long) stat_buf.st_size,
es);
}
@@ -611,6 +636,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
{
ForeignScan *plan = (ForeignScan *) node->ss.ps.plan;
char *filename;
+ bool is_program;
List *options;
CopyState cstate;
FileFdwExecutionState *festate;
@@ -623,7 +649,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
/* Fetch options of foreign table */
fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
- &filename, &options);
+ &filename, &is_program, &options);
/* Add any options from the plan (currently only convert_selectively) */
options = list_concat(options, plan->fdw_private);
@@ -635,7 +661,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
cstate = BeginCopyFrom(NULL,
node->ss.ss_currentRelation,
filename,
- false,
+ is_program,
NIL,
options);
@@ -645,6 +671,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
*/
festate = (FileFdwExecutionState *) palloc(sizeof(FileFdwExecutionState));
festate->filename = filename;
+ festate->is_program = is_program;
festate->options = options;
festate->cstate = cstate;
@@ -709,7 +736,7 @@ fileReScanForeignScan(ForeignScanState *node)
festate->cstate = BeginCopyFrom(NULL,
node->ss.ss_currentRelation,
festate->filename,
- false,
+ festate->is_program,
NIL,
festate->options);
}
@@ -738,11 +765,22 @@ fileAnalyzeForeignTable(Relation relation,
BlockNumber *totalpages)
{
char *filename;
+ bool is_program;
List *options;
struct stat stat_buf;
/* Fetch options of foreign table */
- fileGetOptions(RelationGetRelid(relation), &filename, &options);
+ fileGetOptions(RelationGetRelid(relation), &filename, &is_program, &options);
+
+ /*
+ * If this is a program instead of a file, just return false to skip
+ * analyzing the table. We could run the program and collect stats on
+ * whatever it currently returns, but it seems likely that in such cases
+ * the output would be too volatile for the stats to be useful. Maybe
+ * there should be an option to enable doing this?
+ */
+ if (is_program)
+ return false;
/*
* Get size of the file. (XXX if we fail here, would it be better to just
@@ -769,8 +807,8 @@ fileAnalyzeForeignTable(Relation relation,
/*
* fileIsForeignScanParallelSafe
- * Reading a file in a parallel worker should work just the same as
- * reading it in the leader, so mark scans safe.
+ * Reading a file, or external program, in a parallel worker should work
+ * just the same as reading it in the leader, so mark scans safe.
*/
static bool
fileIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel,
@@ -916,9 +954,10 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel,
/*
* Get size of the file. It might not be there at plan time, though, in
- * which case we have to use a default estimate.
+ * which case we have to use a default estimate. We also have to fall
+ * back to the default if using a program as the input.
*/
- if (stat(fdw_private->filename, &stat_buf) < 0)
+ if (fdw_private->is_program || stat(fdw_private->filename, &stat_buf) < 0)
stat_buf.st_size = 10 * BLCKSZ;
/*
@@ -1000,6 +1039,11 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
* that I/O costs are equivalent to a regular table file of the same size.
* However, we take per-tuple CPU costs as 10x of a seqscan, to account
* for the cost of parsing records.
+ *
+ * In the case of a program source, this calculation is even more divorced
+ * from reality, but we have no good alternative; and it's not clear that
+ * the numbers we produce here matter much anyway, since there's only one
+ * access path for the rel.
*/
run_cost += seq_page_cost * pages;
@@ -1036,6 +1080,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
bool *nulls;
bool found;
char *filename;
+ bool is_program;
List *options;
CopyState cstate;
ErrorContextCallback errcallback;
@@ -1050,12 +1095,12 @@ file_acquire_sample_rows(Relation onerel, int elevel,
nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
/* Fetch options of foreign table */
- fileGetOptions(RelationGetRelid(onerel), &filename, &options);
+ fileGetOptions(RelationGetRelid(onerel), &filename, &is_program, &options);
/*
* Create CopyState from FDW options.
*/
- cstate = BeginCopyFrom(NULL, onerel, filename, false, NIL, options);
+ cstate = BeginCopyFrom(NULL, onerel, filename, is_program, NIL, options);
/*
* Use per-tuple memory context to prevent leak of memory used to read
diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source
index 6fa54409b95..01e2690a825 100644
--- a/contrib/file_fdw/output/file_fdw.source
+++ b/contrib/file_fdw/output/file_fdw.source
@@ -76,7 +76,7 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', null '
'); -- ERROR
ERROR: COPY null representation cannot use newline or carriage return
CREATE FOREIGN TABLE tbl () SERVER file_server; -- ERROR
-ERROR: filename is required for file_fdw foreign tables
+ERROR: either filename or program is required for file_fdw foreign tables
CREATE FOREIGN TABLE agg_text (
a int2 CHECK (a >= 0),
b float4
@@ -132,7 +132,7 @@ ERROR: invalid option "force_not_null"
HINT: There are no valid options in this context.
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- ERROR
ERROR: invalid option "force_not_null"
-HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding
+HINT: Valid options in this context are: filename, program, format, header, delimiter, quote, escape, null, encoding
-- force_null is not allowed to be specified at any foreign object level:
ALTER FOREIGN DATA WRAPPER file_fdw OPTIONS (ADD force_null '*'); -- ERROR
ERROR: invalid option "force_null"
@@ -145,7 +145,7 @@ ERROR: invalid option "force_null"
HINT: There are no valid options in this context.
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_null '*'); -- ERROR
ERROR: invalid option "force_null"
-HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding
+HINT: Valid options in this context are: filename, program, format, header, delimiter, quote, escape, null, encoding
-- basic query tests
SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a;
a | b
diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml
index d3b39aa120b..309a303e031 100644
--- a/doc/src/sgml/file-fdw.sgml
+++ b/doc/src/sgml/file-fdw.sgml
@@ -10,10 +10,11 @@
<para>
The <filename>file_fdw</> module provides the foreign-data wrapper
<function>file_fdw</function>, which can be used to access data
- files in the server's file system. Data files must be in a format
+ files in the server's file system, or to execute programs on the server
+ and read their output. The data file or program output must be in a format
that can be read by <command>COPY FROM</command>;
see <xref linkend="sql-copy"> for details.
- Access to such data files is currently read-only.
+ Access to data files is currently read-only.
</para>
<para>
@@ -27,7 +28,22 @@
<listitem>
<para>
- Specifies the file to be read. Required. Must be an absolute path name.
+ Specifies the file to be read. Must be an absolute path name.
+ Either <literal>filename</literal> or <literal>program</literal> must be
+ specified, but not both.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>program</literal></term>
+
+ <listitem>
+ <para>
+ Specifies the command to be executed. The standard output of this
+ command will be read as though <command>COPY FROM PROGRAM</> were used.
+ Either <literal>program</literal> or <literal>filename</literal> must be
+ specified, but not both.
</para>
</listitem>
</varlistentry>
@@ -37,7 +53,7 @@
<listitem>
<para>
- Specifies the file's format,
+ Specifies the data format,
the same as <command>COPY</>'s <literal>FORMAT</literal> option.
</para>
</listitem>
@@ -48,7 +64,7 @@
<listitem>
<para>
- Specifies whether the file has a header line,
+ Specifies whether the data has a header line,
the same as <command>COPY</>'s <literal>HEADER</literal> option.
</para>
</listitem>
@@ -59,7 +75,7 @@
<listitem>
<para>
- Specifies the file's delimiter character,
+ Specifies the data delimiter character,
the same as <command>COPY</>'s <literal>DELIMITER</literal> option.
</para>
</listitem>
@@ -70,7 +86,7 @@
<listitem>
<para>
- Specifies the file's quote character,
+ Specifies the data quote character,
the same as <command>COPY</>'s <literal>QUOTE</literal> option.
</para>
</listitem>
@@ -81,7 +97,7 @@
<listitem>
<para>
- Specifies the file's escape character,
+ Specifies the data escape character,
the same as <command>COPY</>'s <literal>ESCAPE</literal> option.
</para>
</listitem>
@@ -92,7 +108,7 @@
<listitem>
<para>
- Specifies the file's null string,
+ Specifies the data null string,
the same as <command>COPY</>'s <literal>NULL</literal> option.
</para>
</listitem>
@@ -103,7 +119,7 @@
<listitem>
<para>
- Specifies the file's encoding,
+ Specifies the data encoding,
the same as <command>COPY</>'s <literal>ENCODING</literal> option.
</para>
</listitem>
@@ -112,11 +128,11 @@
</variablelist>
<para>
- Note that while <command>COPY</> allows options such as OIDS and HEADER
- to be specified without a corresponding value, the foreign data wrapper
+ Note that while <command>COPY</> allows options such as <literal>HEADER</>
+ to be specified without a corresponding value, the foreign table option
syntax requires a value to be present in all cases. To activate
- <command>COPY</> options normally supplied without a value, you can
- instead pass the value TRUE.
+ <command>COPY</> options typically written without a value, you can pass
+ the value TRUE, since all such options are Booleans.
</para>
<para>
@@ -133,7 +149,7 @@
<para>
This is a Boolean option. If true, it specifies that values of the
column should not be matched against the null string (that is, the
- file-level <literal>null</literal> option). This has the same effect
+ table-level <literal>null</literal> option). This has the same effect
as listing the column in <command>COPY</>'s
<literal>FORCE_NOT_NULL</literal> option.
</para>
@@ -171,14 +187,24 @@
<para>
Changing table-level options requires superuser privileges, for security
- reasons: only a superuser should be able to determine which file is read.
- In principle non-superusers could be allowed to change the other options,
- but that's not supported at present.
+ reasons: only a superuser should be able to control which file is read
+ or which program is run. In principle non-superusers could be allowed to
+ change the other options, but that's not supported at present.
+ </para>
+
+ <para>
+ When specifying the <literal>program</> option, keep in mind that the option
+ string is executed by the shell. If you need to pass any arguments to the
+ command that come from an untrusted source, you must be careful to strip or
+ escape any characters that might have special meaning to the shell.
+ For security reasons, it is best to use a fixed command string, or at least
+ avoid passing any user input in it.
</para>
<para>
For a foreign table using <literal>file_fdw</>, <command>EXPLAIN</> shows
- the name of the file to be read. Unless <literal>COSTS OFF</> is
+ the name of the file to be read or program to be run.
+ For a file, unless <literal>COSTS OFF</> is
specified, the file size (in bytes) is shown as well.
</para>
@@ -186,7 +212,7 @@
<title id="csvlog-fdw">Create a Foreign Table for PostgreSQL CSV Logs</title>
<para>
- One of the obvious uses for the <literal>file_fdw</> is to make
+ One of the obvious uses for <literal>file_fdw</> is to make
the PostgreSQL activity log available as a table for querying. To
do this, first you must be logging to a CSV file, which here we
will call <literal>pglog.csv</>. First, install <literal>file_fdw</>