aboutsummaryrefslogtreecommitdiff
path: root/contrib/postgres_fdw/postgres_fdw.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2013-03-21 19:43:59 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2013-03-21 19:44:32 -0400
commit9cbc4b80ddc10b36c64514104caa69747c386dcf (patch)
tree56ff643469573d7f83c9bbc89ab44b797ab988cd /contrib/postgres_fdw/postgres_fdw.c
parent08af1a0a2ae8fbbd9949d9fa0f7583f6c09cf4ec (diff)
downloadpostgresql-9cbc4b80ddc10b36c64514104caa69747c386dcf.tar.gz
postgresql-9cbc4b80ddc10b36c64514104caa69747c386dcf.zip
Redo postgres_fdw's planner code so it can handle parameterized paths.
I wasn't going to ship this without having at least some example of how to do that. This version isn't terribly bright; in particular it won't consider any combinations of multiple join clauses. Given the cost of executing a remote EXPLAIN, I'm not sure we want to be very aggressive about doing that, anyway. In support of this, refactor generate_implied_equalities_for_indexcol so that it can be used to extract equivalence clauses that aren't necessarily tied to an index.
Diffstat (limited to 'contrib/postgres_fdw/postgres_fdw.c')
-rw-r--r--contrib/postgres_fdw/postgres_fdw.c941
1 files changed, 591 insertions, 350 deletions
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 58dd12c575a..982a8d9a61f 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -23,10 +23,13 @@
#include "funcapi.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
#include "optimizer/planmain.h"
#include "optimizer/prep.h"
+#include "optimizer/restrictinfo.h"
#include "optimizer/var.h"
#include "parser/parsetree.h"
#include "utils/builtins.h"
@@ -49,28 +52,41 @@ PG_MODULE_MAGIC;
*/
typedef struct PgFdwRelationInfo
{
- /* XXX underdocumented, but a lot of this shouldn't be here anyway */
- StringInfoData sql;
- Cost startup_cost;
- Cost total_cost;
+ /* baserestrictinfo clauses, broken down into safe and unsafe subsets. */
List *remote_conds;
- List *param_conds;
List *local_conds;
- List *param_numbers;
+
+ /* Bitmap of attr numbers we need to fetch from the remote server. */
+ Bitmapset *attrs_used;
+
+ /* Cost and selectivity of local_conds. */
+ QualCost local_conds_cost;
+ Selectivity local_conds_sel;
+
+ /* Estimated size and cost for a scan with baserestrictinfo quals. */
+ double rows;
+ int width;
+ Cost startup_cost;
+ Cost total_cost;
+
+ /* Options extracted from catalogs. */
+ bool use_remote_estimate;
+ Cost fdw_startup_cost;
+ Cost fdw_tuple_cost;
/* Cached catalog information. */
ForeignTable *table;
ForeignServer *server;
+ UserMapping *user; /* only set in use_remote_estimate mode */
} PgFdwRelationInfo;
/*
* Indexes of FDW-private information stored in fdw_private lists.
*
* We store various information in ForeignScan.fdw_private to pass it from
- * planner to executor. Specifically there is:
+ * planner to executor. Currently there is just:
*
* 1) SELECT statement text to be sent to the remote server
- * 2) IDs of PARAM_EXEC Params used in the SELECT statement
*
* These items are indexed with the enum FdwScanPrivateIndex, so an item
* can be fetched with list_nth(). For example, to get the SELECT statement:
@@ -79,9 +95,7 @@ typedef struct PgFdwRelationInfo
enum FdwScanPrivateIndex
{
/* SQL statement to execute remotely (as a String node) */
- FdwScanPrivateSelectSql,
- /* Integer list of param IDs of PARAM_EXEC Params used in SQL stmt */
- FdwScanPrivateExternParamIds
+ FdwScanPrivateSelectSql
};
/*
@@ -117,10 +131,10 @@ typedef struct PgFdwScanState
PGconn *conn; /* connection for the scan */
unsigned int cursor_number; /* quasi-unique ID for my cursor */
bool cursor_exists; /* have we created the cursor? */
- bool extparams_done; /* have we converted PARAM_EXTERN params? */
int numParams; /* number of parameters passed to query */
- Oid *param_types; /* array of types of query parameters */
- const char **param_values; /* array of values of query parameters */
+ FmgrInfo *param_flinfo; /* output conversion functions for them */
+ List *param_exprs; /* executable expressions for param values */
+ const char **param_values; /* textual values of query parameters */
/* for storing result tuples */
HeapTuple *tuples; /* array of currently-retrieved tuples */
@@ -194,6 +208,13 @@ typedef struct ConversionLocation
AttrNumber cur_attno; /* attribute number being processed, or 0 */
} ConversionLocation;
+/* Callback argument for ec_member_matches_foreign */
+typedef struct
+{
+ Expr *current; /* current expr, or NULL if not yet found */
+ List *already_used; /* expressions already dealt with */
+} ec_member_foreign_arg;
+
/*
* SQL functions
*/
@@ -260,12 +281,20 @@ static bool postgresAnalyzeForeignTable(Relation relation,
/*
* Helper functions
*/
+static void estimate_path_cost_size(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *join_conds,
+ double *p_rows, int *p_width,
+ Cost *p_startup_cost, Cost *p_total_cost);
static void get_remote_estimate(const char *sql,
PGconn *conn,
double *rows,
int *width,
Cost *startup_cost,
Cost *total_cost);
+static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
+ EquivalenceClass *ec, EquivalenceMember *em,
+ void *arg);
static void create_cursor(ForeignScanState *node);
static void fetch_more_data(ForeignScanState *node);
static void close_cursor(PGconn *conn, unsigned int cursor_number);
@@ -330,38 +359,15 @@ postgres_fdw_handler(PG_FUNCTION_ARGS)
* postgresGetForeignRelSize
* Estimate # of rows and width of the result of the scan
*
- * Here we estimate number of rows returned by the scan in two steps. In the
- * first step, we execute remote EXPLAIN command to obtain the number of rows
- * returned from remote side. In the second step, we calculate the selectivity
- * of the filtering done on local side, and modify first estimate.
- *
- * We have to get some catalog objects and generate remote query string here,
- * so we store such expensive information in FDW private area of RelOptInfo and
- * pass them to subsequent functions for reuse.
+ * We should consider the effect of all baserestrictinfo clauses here, but
+ * not any join clauses.
*/
static void
postgresGetForeignRelSize(PlannerInfo *root,
RelOptInfo *baserel,
Oid foreigntableid)
{
- bool use_remote_estimate = false;
PgFdwRelationInfo *fpinfo;
- StringInfo sql;
- ForeignTable *table;
- ForeignServer *server;
- Selectivity sel;
- double rows;
- int width;
- Cost startup_cost;
- Cost total_cost;
- Cost run_cost;
- QualCost qpqual_cost;
- Cost cpu_per_tuple;
- List *remote_conds;
- List *param_conds;
- List *local_conds;
- List *param_numbers;
- Bitmapset *attrs_used;
ListCell *lc;
/*
@@ -369,127 +375,122 @@ postgresGetForeignRelSize(PlannerInfo *root,
* functions.
*/
fpinfo = (PgFdwRelationInfo *) palloc0(sizeof(PgFdwRelationInfo));
- initStringInfo(&fpinfo->sql);
- sql = &fpinfo->sql;
+ baserel->fdw_private = (void *) fpinfo;
+
+ /* Look up foreign-table catalog info. */
+ fpinfo->table = GetForeignTable(foreigntableid);
+ fpinfo->server = GetForeignServer(fpinfo->table->serverid);
/*
- * Determine whether we use remote estimate or not. Note that per-table
- * setting overrides per-server setting.
+ * Extract user-settable option values. Note that per-table setting of
+ * use_remote_estimate overrides per-server setting.
*/
- table = GetForeignTable(foreigntableid);
- server = GetForeignServer(table->serverid);
- foreach(lc, server->options)
+ fpinfo->use_remote_estimate = false;
+ fpinfo->fdw_startup_cost = DEFAULT_FDW_STARTUP_COST;
+ fpinfo->fdw_tuple_cost = DEFAULT_FDW_TUPLE_COST;
+
+ foreach(lc, fpinfo->server->options)
{
DefElem *def = (DefElem *) lfirst(lc);
if (strcmp(def->defname, "use_remote_estimate") == 0)
- {
- use_remote_estimate = defGetBoolean(def);
- break;
- }
+ fpinfo->use_remote_estimate = defGetBoolean(def);
+ else if (strcmp(def->defname, "fdw_startup_cost") == 0)
+ fpinfo->fdw_startup_cost = strtod(defGetString(def), NULL);
+ else if (strcmp(def->defname, "fdw_tuple_cost") == 0)
+ fpinfo->fdw_tuple_cost = strtod(defGetString(def), NULL);
}
- foreach(lc, table->options)
+ foreach(lc, fpinfo->table->options)
{
DefElem *def = (DefElem *) lfirst(lc);
if (strcmp(def->defname, "use_remote_estimate") == 0)
{
- use_remote_estimate = defGetBoolean(def);
- break;
+ fpinfo->use_remote_estimate = defGetBoolean(def);
+ break; /* only need the one value */
}
}
/*
- * Identify which restriction clauses can be sent to the remote server and
- * which can't. Conditions that are remotely executable but contain
- * PARAM_EXTERN Params have to be treated separately because we can't use
- * placeholders in remote EXPLAIN.
+ * If the table or the server is configured to use remote estimates,
+ * identify which user to do remote access as during planning. This
+ * should match what ExecCheckRTEPerms() does. If we fail due to lack of
+ * permissions, the query would have failed at runtime anyway.
+ */
+ if (fpinfo->use_remote_estimate)
+ {
+ RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
+ Oid userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+
+ fpinfo->user = GetUserMapping(userid, fpinfo->server->serverid);
+ }
+ else
+ fpinfo->user = NULL;
+
+ /*
+ * Identify which baserestrictinfo clauses can be sent to the remote
+ * server and which can't.
*/
- classifyConditions(root, baserel, &remote_conds, &param_conds,
- &local_conds, &param_numbers);
+ classifyConditions(root, baserel,
+ &fpinfo->remote_conds, &fpinfo->local_conds);
/*
* Identify which attributes will need to be retrieved from the remote
- * server. These include all attrs needed for joins or final output, plus
- * all attrs used in the local_conds.
+ * server. These include all attrs needed for joins or final output, plus
+ * all attrs used in the local_conds. (Note: if we end up using a
+ * parameterized scan, it's possible that some of the join clauses will be
+ * sent to the remote and thus we wouldn't really need to retrieve the
+ * columns used in them. Doesn't seem worth detecting that case though.)
*/
- attrs_used = NULL;
+ fpinfo->attrs_used = NULL;
pull_varattnos((Node *) baserel->reltargetlist, baserel->relid,
- &attrs_used);
- foreach(lc, local_conds)
+ &fpinfo->attrs_used);
+ foreach(lc, fpinfo->local_conds)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
pull_varattnos((Node *) rinfo->clause, baserel->relid,
- &attrs_used);
+ &fpinfo->attrs_used);
}
/*
- * Construct remote query which consists of SELECT, FROM, and WHERE
- * clauses. For now, leave out the param_conds.
+ * Compute the selectivity and cost of the local_conds, so we don't have
+ * to do it over again for each path. The best we can do for these
+ * conditions is to estimate selectivity on the basis of local statistics.
*/
- deparseSelectSql(sql, root, baserel, attrs_used);
- if (remote_conds)
- appendWhereClause(sql, root, remote_conds, true);
+ fpinfo->local_conds_sel = clauselist_selectivity(root,
+ fpinfo->local_conds,
+ baserel->relid,
+ JOIN_INNER,
+ NULL);
+
+ cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root);
/*
* If the table or the server is configured to use remote estimates,
- * connect to the foreign server and execute EXPLAIN with the quals that
- * don't contain any Param nodes. Otherwise, estimate rows using whatever
- * statistics we have locally, in a way similar to ordinary tables.
+ * connect to the foreign server and execute EXPLAIN to estimate the
+ * number of rows selected by the restriction clauses, as well as the
+ * average row width. Otherwise, estimate using whatever statistics we
+ * have locally, in a way similar to ordinary tables.
*/
- if (use_remote_estimate)
+ if (fpinfo->use_remote_estimate)
{
- RangeTblEntry *rte;
- Oid userid;
- UserMapping *user;
- PGconn *conn;
-
/*
- * Identify which user to do the remote access as. This should match
- * what ExecCheckRTEPerms() does. If we fail due to lack of
- * permissions, the query would have failed at runtime anyway.
+ * Get cost/size estimates with help of remote server. Save the
+ * values in fpinfo so we don't need to do it again to generate the
+ * basic foreign path.
*/
- rte = planner_rt_fetch(baserel->relid, root);
- userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+ estimate_path_cost_size(root, baserel, NIL,
+ &fpinfo->rows, &fpinfo->width,
+ &fpinfo->startup_cost, &fpinfo->total_cost);
- user = GetUserMapping(userid, server->serverid);
- conn = GetConnection(server, user, false);
- get_remote_estimate(sql->data, conn, &rows, &width,
- &startup_cost, &total_cost);
- ReleaseConnection(conn);
-
- /*
- * Estimate selectivity of conditions which were not used in remote
- * EXPLAIN by calling clauselist_selectivity(). The best we can do
- * for these conditions is to estimate selectivity on the basis of
- * local statistics.
- */
- sel = clauselist_selectivity(root, param_conds,
- baserel->relid, JOIN_INNER, NULL);
- sel *= clauselist_selectivity(root, local_conds,
- baserel->relid, JOIN_INNER, NULL);
-
- /*
- * Add in the eval cost of those conditions, too.
- */
- cost_qual_eval(&qpqual_cost, param_conds, root);
- startup_cost += qpqual_cost.startup;
- total_cost += qpqual_cost.per_tuple * rows;
- cost_qual_eval(&qpqual_cost, local_conds, root);
- startup_cost += qpqual_cost.startup;
- total_cost += qpqual_cost.per_tuple * rows;
-
- /* Report estimated numbers to planner. */
- baserel->rows = clamp_row_est(rows * sel);
- baserel->width = width;
+ /* Report estimated baserel size to planner. */
+ baserel->rows = fpinfo->rows;
+ baserel->width = fpinfo->width;
}
else
{
/*
- * Estimate rows from the result of the last ANALYZE, using all
- * conditions specified in original query.
- *
* If the foreign table has never been ANALYZEd, it will have relpages
* and reltuples equal to zero, which most likely has nothing to do
* with reality. We can't do a whole lot about that if we're not
@@ -505,83 +506,14 @@ postgresGetForeignRelSize(PlannerInfo *root,
(10 * BLCKSZ) / (baserel->width + sizeof(HeapTupleHeaderData));
}
+ /* Estimate baserel size as best we can with local statistics. */
set_baserel_size_estimates(root, baserel);
- /* Cost as though this were a seqscan, which is pessimistic. */
- startup_cost = 0;
- run_cost = 0;
- run_cost += seq_page_cost * baserel->pages;
-
- startup_cost += baserel->baserestrictcost.startup;
- cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
- run_cost += cpu_per_tuple * baserel->tuples;
-
- total_cost = startup_cost + run_cost;
+ /* Fill in basically-bogus cost estimates for use later. */
+ estimate_path_cost_size(root, baserel, NIL,
+ &fpinfo->rows, &fpinfo->width,
+ &fpinfo->startup_cost, &fpinfo->total_cost);
}
-
- /*
- * Finish deparsing remote query by adding conditions which were unusable
- * in remote EXPLAIN because they contain Param nodes.
- */
- if (param_conds)
- appendWhereClause(sql, root, param_conds, (remote_conds == NIL));
-
- /*
- * Add FOR UPDATE/SHARE if appropriate. We apply locking during the
- * initial row fetch, rather than later on as is done for local tables.
- * The extra roundtrips involved in trying to duplicate the local
- * semantics exactly don't seem worthwhile (see also comments for
- * RowMarkType).
- */
- if (baserel->relid == root->parse->resultRelation &&
- (root->parse->commandType == CMD_UPDATE ||
- root->parse->commandType == CMD_DELETE))
- {
- /* Relation is UPDATE/DELETE target, so use FOR UPDATE */
- appendStringInfo(sql, " FOR UPDATE");
- }
- else
- {
- RowMarkClause *rc = get_parse_rowmark(root->parse, baserel->relid);
-
- if (rc)
- {
- /*
- * Relation is specified as a FOR UPDATE/SHARE target, so handle
- * that.
- *
- * For now, just ignore any [NO] KEY specification, since (a) it's
- * not clear what that means for a remote table that we don't have
- * complete information about, and (b) it wouldn't work anyway on
- * older remote servers. Likewise, we don't worry about NOWAIT.
- */
- switch (rc->strength)
- {
- case LCS_FORKEYSHARE:
- case LCS_FORSHARE:
- appendStringInfo(sql, " FOR SHARE");
- break;
- case LCS_FORNOKEYUPDATE:
- case LCS_FORUPDATE:
- appendStringInfo(sql, " FOR UPDATE");
- break;
- }
- }
- }
-
- /*
- * Store obtained information into FDW-private area of RelOptInfo so it's
- * available to subsequent functions.
- */
- fpinfo->startup_cost = startup_cost;
- fpinfo->total_cost = total_cost;
- fpinfo->remote_conds = remote_conds;
- fpinfo->param_conds = param_conds;
- fpinfo->local_conds = local_conds;
- fpinfo->param_numbers = param_numbers;
- fpinfo->table = table;
- fpinfo->server = server;
- baserel->fdw_private = (void *) fpinfo;
}
/*
@@ -595,77 +527,187 @@ postgresGetForeignPaths(PlannerInfo *root,
{
PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private;
ForeignPath *path;
- ListCell *lc;
- double fdw_startup_cost = DEFAULT_FDW_STARTUP_COST;
- double fdw_tuple_cost = DEFAULT_FDW_TUPLE_COST;
+ Relids lateral_referencers;
+ List *join_quals;
+ Relids required_outer;
+ double rows;
+ int width;
Cost startup_cost;
Cost total_cost;
- List *fdw_private;
+ ListCell *lc;
/*
- * Check for user override of fdw_startup_cost, fdw_tuple_cost values
+ * Create simplest ForeignScan path node and add it to baserel. This path
+ * corresponds to SeqScan path of regular tables (though depending on what
+ * baserestrict conditions we were able to send to remote, there might
+ * actually be an indexscan happening there). We already did all the work
+ * to estimate cost and size of this path.
*/
- foreach(lc, fpinfo->server->options)
- {
- DefElem *d = (DefElem *) lfirst(lc);
-
- if (strcmp(d->defname, "fdw_startup_cost") == 0)
- fdw_startup_cost = strtod(defGetString(d), NULL);
- else if (strcmp(d->defname, "fdw_tuple_cost") == 0)
- fdw_tuple_cost = strtod(defGetString(d), NULL);
- }
+ path = create_foreignscan_path(root, baserel,
+ fpinfo->rows,
+ fpinfo->startup_cost,
+ fpinfo->total_cost,
+ NIL, /* no pathkeys */
+ NULL, /* no outer rel either */
+ NIL); /* no fdw_private list */
+ add_path(baserel, (Path *) path);
/*
- * We have cost values which are estimated on remote side, so adjust them
- * for better estimate which respect various stuffs to complete the scan,
- * such as sending query, transferring result, and local filtering.
- */
- startup_cost = fpinfo->startup_cost;
- total_cost = fpinfo->total_cost;
-
- /*----------
- * Adjust costs with factors of the corresponding foreign server:
- * - add cost to establish connection to both startup and total
- * - add cost to manipulate on remote, and transfer result to total
- * - add cost to manipulate tuples on local side to total
- *----------
+ * If we're not using remote estimates, stop here. We have no way to
+ * estimate whether any join clauses would be worth sending across, so
+ * don't bother building parameterized paths.
*/
- startup_cost += fdw_startup_cost;
- total_cost += fdw_startup_cost;
- total_cost += fdw_tuple_cost * baserel->rows;
- total_cost += cpu_tuple_cost * baserel->rows;
+ if (!fpinfo->use_remote_estimate)
+ return;
/*
- * Build the fdw_private list that will be available to the executor.
- * Items in the list must match enum FdwScanPrivateIndex, above.
+ * As a crude first hack, we consider each available join clause and try
+ * to make a parameterized path using just that clause. Later we should
+ * consider combinations of clauses, probably.
*/
- fdw_private = list_make2(makeString(fpinfo->sql.data),
- fpinfo->param_numbers);
/*
- * Create simplest ForeignScan path node and add it to baserel. This path
- * corresponds to SeqScan path of regular tables (though depending on what
- * baserestrict conditions we were able to send to remote, there might
- * actually be an indexscan happening there).
+ * If there are any rels that have LATERAL references to this one, we
+ * cannot use join quals referencing them as remote quals for this one,
+ * since such rels would have to be on the inside not the outside of a
+ * nestloop join relative to this one. Create a Relids set listing all
+ * such rels, for use in checks of potential join clauses.
*/
- path = create_foreignscan_path(root, baserel,
- baserel->rows,
- startup_cost,
- total_cost,
- NIL, /* no pathkeys */
- NULL, /* no outer rel either */
- fdw_private);
- add_path(baserel, (Path *) path);
+ lateral_referencers = NULL;
+ foreach(lc, root->lateral_info_list)
+ {
+ LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(lc);
+
+ if (bms_is_member(baserel->relid, ljinfo->lateral_lhs))
+ lateral_referencers = bms_add_member(lateral_referencers,
+ ljinfo->lateral_rhs);
+ }
+
+ /* Scan the rel's join clauses */
+ foreach(lc, baserel->joininfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ /* Check if clause can be moved to this rel */
+ if (!join_clause_is_movable_to(rinfo, baserel->relid))
+ continue;
+
+ /* Not useful if it conflicts with any LATERAL references */
+ if (bms_overlap(rinfo->clause_relids, lateral_referencers))
+ continue;
+
+ /* See if it is safe to send to remote */
+ if (!is_foreign_expr(root, baserel, rinfo->clause))
+ continue;
+
+ /*
+ * OK, get a cost estimate from the remote, and make a path.
+ */
+ join_quals = list_make1(rinfo);
+ estimate_path_cost_size(root, baserel, join_quals,
+ &rows, &width,
+ &startup_cost, &total_cost);
+
+ /* Must calculate required outer rels for this path */
+ required_outer = bms_union(rinfo->clause_relids,
+ baserel->lateral_relids);
+ /* We do not want the foreign rel itself listed in required_outer */
+ required_outer = bms_del_member(required_outer, baserel->relid);
+ /* Enforce convention that required_outer is exactly NULL if empty */
+ if (bms_is_empty(required_outer))
+ required_outer = NULL;
+
+ path = create_foreignscan_path(root, baserel,
+ rows,
+ startup_cost,
+ total_cost,
+ NIL, /* no pathkeys */
+ required_outer,
+ NIL); /* no fdw_private list */
+ add_path(baserel, (Path *) path);
+ }
/*
- * XXX We can consider sorted path or parameterized path here if we know
- * that foreign table is indexed on remote end. For this purpose, we
- * might have to support FOREIGN INDEX to represent possible sets of sort
- * keys and/or filtering. Or we could just try some join conditions and
- * see if remote side estimates using them as markedly cheaper. Note that
- * executor functions need work to support internal Params before we can
- * try generating any parameterized paths, though.
+ * The above scan examined only "generic" join clauses, not those that
+ * were absorbed into EquivalenceClauses. See if we can make anything out
+ * of EquivalenceClauses.
*/
+ if (baserel->has_eclass_joins)
+ {
+ /*
+ * We repeatedly scan the eclass list looking for column references
+ * (or expressions) belonging to the foreign rel. Each time we find
+ * one, we generate a list of equivalence joinclauses for it, and then
+ * try to make those into foreign paths. Repeat till there are no
+ * more candidate EC members.
+ */
+ ec_member_foreign_arg arg;
+
+ arg.already_used = NIL;
+ for (;;)
+ {
+ List *clauses;
+
+ /* Make clauses, skipping any that join to lateral_referencers */
+ arg.current = NULL;
+ clauses = generate_implied_equalities_for_column(root,
+ baserel,
+ ec_member_matches_foreign,
+ (void *) &arg,
+ lateral_referencers);
+
+ /* Done if there are no more expressions in the foreign rel */
+ if (arg.current == NULL)
+ {
+ Assert(clauses == NIL);
+ break;
+ }
+
+ /* Scan the extracted join clauses */
+ foreach(lc, clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ /* Check if clause can be moved to this rel */
+ if (!join_clause_is_movable_to(rinfo, baserel->relid))
+ continue;
+
+ /* Shouldn't conflict with any LATERAL references */
+ Assert(!bms_overlap(rinfo->clause_relids, lateral_referencers));
+
+ /* See if it is safe to send to remote */
+ if (!is_foreign_expr(root, baserel, rinfo->clause))
+ continue;
+
+ /*
+ * OK, get a cost estimate from the remote, and make a path.
+ */
+ join_quals = list_make1(rinfo);
+ estimate_path_cost_size(root, baserel, join_quals,
+ &rows, &width,
+ &startup_cost, &total_cost);
+
+ /* Must calculate required outer rels for this path */
+ required_outer = bms_union(rinfo->clause_relids,
+ baserel->lateral_relids);
+ required_outer = bms_del_member(required_outer, baserel->relid);
+ if (bms_is_empty(required_outer))
+ required_outer = NULL;
+
+ path = create_foreignscan_path(root, baserel,
+ rows,
+ startup_cost,
+ total_cost,
+ NIL, /* no pathkeys */
+ required_outer,
+ NIL); /* no fdw_private */
+ add_path(baserel, (Path *) path);
+ }
+
+ /* Try again, now ignoring the expression we found this time */
+ arg.already_used = lappend(arg.already_used, arg.current);
+ }
+ }
}
/*
@@ -682,19 +724,32 @@ postgresGetForeignPlan(PlannerInfo *root,
{
PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private;
Index scan_relid = baserel->relid;
- List *fdw_private = best_path->fdw_private;
- List *remote_exprs = NIL;
+ List *fdw_private;
+ List *remote_conds = NIL;
List *local_exprs = NIL;
+ List *params_list = NIL;
+ StringInfoData sql;
ListCell *lc;
/*
* Separate the scan_clauses into those that can be executed remotely and
- * those that can't. For now, we accept only remote clauses that were
- * previously determined to be safe by classifyClauses (so, only
- * baserestrictinfo clauses can be used that way).
+ * those that can't. baserestrictinfo clauses that were previously
+ * determined to be safe or unsafe by classifyClauses are shown in
+ * fpinfo->remote_conds and fpinfo->local_conds. Anything else in the
+ * scan_clauses list should be a join clause that was found safe by
+ * postgresGetForeignPaths.
+ *
+ * Note: for clauses extracted from EquivalenceClasses, it's possible that
+ * what we get here is a different representation of the clause than what
+ * postgresGetForeignPaths saw; for example we might get a commuted
+ * version of the clause. So we can't insist on simple equality as we do
+ * for the baserestrictinfo clauses.
*
* This code must match "extract_actual_clauses(scan_clauses, false)"
* except for the additional decision about remote versus local execution.
+ * Note however that we only strip the RestrictInfo nodes from the
+ * local_exprs list, since appendWhereClause expects a list of
+ * RestrictInfos.
*/
foreach(lc, scan_clauses)
{
@@ -706,30 +761,88 @@ postgresGetForeignPlan(PlannerInfo *root,
if (rinfo->pseudoconstant)
continue;
- /* Either simple or parameterized remote clauses are OK now */
- if (list_member_ptr(fpinfo->remote_conds, rinfo) ||
- list_member_ptr(fpinfo->param_conds, rinfo))
- remote_exprs = lappend(remote_exprs, rinfo->clause);
- else
+ if (list_member_ptr(fpinfo->remote_conds, rinfo))
+ remote_conds = lappend(remote_conds, rinfo);
+ else if (list_member_ptr(fpinfo->local_conds, rinfo))
local_exprs = lappend(local_exprs, rinfo->clause);
+ else
+ {
+ Assert(is_foreign_expr(root, baserel, rinfo->clause));
+ remote_conds = lappend(remote_conds, rinfo);
+ }
}
/*
+ * Build the query string to be sent for execution, and identify
+ * expressions to be sent as parameters.
+ */
+ initStringInfo(&sql);
+ deparseSelectSql(&sql, root, baserel, fpinfo->attrs_used);
+ if (remote_conds)
+ appendWhereClause(&sql, root, baserel, remote_conds,
+ true, &params_list);
+
+ /*
+ * Add FOR UPDATE/SHARE if appropriate. We apply locking during the
+ * initial row fetch, rather than later on as is done for local tables.
+ * The extra roundtrips involved in trying to duplicate the local
+ * semantics exactly don't seem worthwhile (see also comments for
+ * RowMarkType).
+ */
+ if (baserel->relid == root->parse->resultRelation &&
+ (root->parse->commandType == CMD_UPDATE ||
+ root->parse->commandType == CMD_DELETE))
+ {
+ /* Relation is UPDATE/DELETE target, so use FOR UPDATE */
+ appendStringInfo(&sql, " FOR UPDATE");
+ }
+ else
+ {
+ RowMarkClause *rc = get_parse_rowmark(root->parse, baserel->relid);
+
+ if (rc)
+ {
+ /*
+ * Relation is specified as a FOR UPDATE/SHARE target, so handle
+ * that.
+ *
+ * For now, just ignore any [NO] KEY specification, since (a) it's
+ * not clear what that means for a remote table that we don't have
+ * complete information about, and (b) it wouldn't work anyway on
+ * older remote servers. Likewise, we don't worry about NOWAIT.
+ */
+ switch (rc->strength)
+ {
+ case LCS_FORKEYSHARE:
+ case LCS_FORSHARE:
+ appendStringInfo(&sql, " FOR SHARE");
+ break;
+ case LCS_FORNOKEYUPDATE:
+ case LCS_FORUPDATE:
+ appendStringInfo(&sql, " FOR UPDATE");
+ break;
+ }
+ }
+ }
+
+ /*
+ * Build the fdw_private list that will be available to the executor.
+ * Items in the list must match enum FdwScanPrivateIndex, above.
+ */
+ fdw_private = list_make1(makeString(sql.data));
+
+ /*
* Create the ForeignScan node from target list, local filtering
- * expressions, remote filtering expressions, and FDW private information.
- *
- * Note that the remote_exprs are stored in the fdw_exprs field of the
- * finished plan node; we can't keep them in private state because then
- * they wouldn't be subject to later planner processing.
+ * expressions, remote parameter expressions, and FDW private information.
*
- * XXX Currently, the remote_exprs aren't actually used at runtime, so we
- * don't need to store them at all. But we'll keep this behavior for a
- * little while for debugging reasons.
+ * Note that the remote parameter expressions are stored in the fdw_exprs
+ * field of the finished plan node; we can't keep them in private state
+ * because then they wouldn't be subject to later planner processing.
*/
return make_foreignscan(tlist,
local_exprs,
scan_relid,
- remote_exprs,
+ params_list,
fdw_private);
}
@@ -748,9 +861,9 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags)
ForeignTable *table;
ForeignServer *server;
UserMapping *user;
- List *param_numbers;
int numParams;
int i;
+ ListCell *lc;
/*
* Do nothing in EXPLAIN (no ANALYZE) case. node->fdw_state stays NULL.
@@ -805,40 +918,42 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags)
/* Get info we'll need for data conversion. */
fsstate->attinmeta = TupleDescGetAttInMetadata(RelationGetDescr(fsstate->rel));
- /*
- * Allocate buffer for query parameters, if the remote conditions use any.
- *
- * We use a parameter slot for each PARAM_EXTERN parameter, even though
- * not all of them may get sent to the remote server. This allows us to
- * refer to Params by their original number rather than remapping, and it
- * doesn't cost much. Slots that are not actually used get filled with
- * null values that are arbitrarily marked as being of type int4.
- */
- param_numbers = (List *)
- list_nth(fsstate->fdw_private, FdwScanPrivateExternParamIds);
- if (param_numbers != NIL)
+ /* Prepare for output conversion of parameters used in remote query. */
+ numParams = list_length(fsplan->fdw_exprs);
+ fsstate->numParams = numParams;
+ fsstate->param_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo) * numParams);
+
+ i = 0;
+ foreach(lc, fsplan->fdw_exprs)
{
- ParamListInfo params = estate->es_param_list_info;
+ Node *param_expr = (Node *) lfirst(lc);
+ Oid typefnoid;
+ bool isvarlena;
- numParams = params ? params->numParams : 0;
+ getTypeOutputInfo(exprType(param_expr), &typefnoid, &isvarlena);
+ fmgr_info(typefnoid, &fsstate->param_flinfo[i]);
+ i++;
}
- else
- numParams = 0;
- fsstate->numParams = numParams;
+
+ /*
+ * Prepare remote-parameter expressions for evaluation. (Note: in
+ * practice, we expect that all these expressions will be just Params, so
+ * we could possibly do something more efficient than using the full
+ * expression-eval machinery for this. But probably there would be little
+ * benefit, and it'd require postgres_fdw to know more than is desirable
+ * about Param evaluation.)
+ */
+ fsstate->param_exprs = (List *)
+ ExecInitExpr((Expr *) fsplan->fdw_exprs,
+ (PlanState *) node);
+
+ /*
+ * Allocate buffer for text form of query parameters, if any.
+ */
if (numParams > 0)
- {
- /* we initially fill all slots with value = NULL, type = int4 */
- fsstate->param_types = (Oid *) palloc(numParams * sizeof(Oid));
fsstate->param_values = (const char **) palloc0(numParams * sizeof(char *));
- for (i = 0; i < numParams; i++)
- fsstate->param_types[i] = INT4OID;
- }
else
- {
- fsstate->param_types = NULL;
fsstate->param_values = NULL;
- }
- fsstate->extparams_done = false;
}
/*
@@ -894,11 +1009,6 @@ postgresReScanForeignScan(ForeignScanState *node)
char sql[64];
PGresult *res;
- /*
- * Note: we assume that PARAM_EXTERN params don't change over the life of
- * the query, so no need to reset extparams_done.
- */
-
/* If we haven't created the cursor yet, nothing to do. */
if (!fsstate->cursor_exists)
return;
@@ -1497,8 +1607,128 @@ postgresExplainForeignModify(ModifyTableState *mtstate,
}
}
+
+/*
+ * estimate_path_cost_size
+ * Get cost and size estimates for a foreign scan
+ *
+ * We assume that all the baserestrictinfo clauses will be applied, plus
+ * any join clauses listed in join_conds.
+ */
+static void
+estimate_path_cost_size(PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *join_conds,
+ double *p_rows, int *p_width,
+ Cost *p_startup_cost, Cost *p_total_cost)
+{
+ PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private;
+ double rows;
+ double retrieved_rows;
+ int width;
+ Cost startup_cost;
+ Cost total_cost;
+ Cost run_cost;
+ Cost cpu_per_tuple;
+
+ /*
+ * If the table or the server is configured to use remote estimates,
+ * connect to the foreign server and execute EXPLAIN to estimate the
+ * number of rows selected by the restriction+join clauses. Otherwise,
+ * estimate rows using whatever statistics we have locally, in a way
+ * similar to ordinary tables.
+ */
+ if (fpinfo->use_remote_estimate)
+ {
+ StringInfoData sql;
+ PGconn *conn;
+
+ /*
+ * Construct EXPLAIN query including the desired SELECT, FROM, and
+ * WHERE clauses. Params and other-relation Vars are replaced by
+ * dummy values.
+ */
+ initStringInfo(&sql);
+ appendStringInfoString(&sql, "EXPLAIN ");
+ deparseSelectSql(&sql, root, baserel, fpinfo->attrs_used);
+ if (fpinfo->remote_conds)
+ appendWhereClause(&sql, root, baserel, fpinfo->remote_conds,
+ true, NULL);
+ if (join_conds)
+ appendWhereClause(&sql, root, baserel, join_conds,
+ (fpinfo->remote_conds == NIL), NULL);
+
+ /* Get the remote estimate */
+ conn = GetConnection(fpinfo->server, fpinfo->user, false);
+ get_remote_estimate(sql.data, conn, &rows, &width,
+ &startup_cost, &total_cost);
+ ReleaseConnection(conn);
+
+ retrieved_rows = rows;
+
+ /* Factor in the selectivity of the local_conds */
+ rows = clamp_row_est(rows * fpinfo->local_conds_sel);
+
+ /* Add in the eval cost of the local_conds */
+ startup_cost += fpinfo->local_conds_cost.startup;
+ total_cost += fpinfo->local_conds_cost.per_tuple * retrieved_rows;
+ }
+ else
+ {
+ /*
+ * We don't support join conditions in this mode (hence, no
+ * parameterized paths can be made).
+ */
+ Assert(join_conds == NIL);
+
+ /* Use rows/width estimates made by set_baserel_size_estimates. */
+ rows = baserel->rows;
+ width = baserel->width;
+
+ /*
+ * Back into an estimate of the number of retrieved rows. Just in
+ * case this is nuts, clamp to at most baserel->tuples.
+ */
+ retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
+ retrieved_rows = Min(retrieved_rows, baserel->tuples);
+
+ /*
+ * Cost as though this were a seqscan, which is pessimistic. We
+ * effectively imagine the local_conds are being evaluated remotely,
+ * too.
+ */
+ startup_cost = 0;
+ run_cost = 0;
+ run_cost += seq_page_cost * baserel->pages;
+
+ startup_cost += baserel->baserestrictcost.startup;
+ cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
+ run_cost += cpu_per_tuple * baserel->tuples;
+
+ total_cost = startup_cost + run_cost;
+ }
+
+ /*
+ * Add some additional cost factors to account for connection overhead
+ * (fdw_startup_cost), transferring data across the network
+ * (fdw_tuple_cost per retrieved row), and local manipulation of the data
+ * (cpu_tuple_cost per retrieved row).
+ */
+ startup_cost += fpinfo->fdw_startup_cost;
+ total_cost += fpinfo->fdw_startup_cost;
+ total_cost += fpinfo->fdw_tuple_cost * retrieved_rows;
+ total_cost += cpu_tuple_cost * retrieved_rows;
+
+ /* Return results. */
+ *p_rows = rows;
+ *p_width = width;
+ *p_startup_cost = startup_cost;
+ *p_total_cost = total_cost;
+}
+
/*
- * Estimate costs of executing given SQL statement.
+ * Estimate costs of executing a SQL statement remotely.
+ * The given "sql" must be an EXPLAIN command.
*/
static void
get_remote_estimate(const char *sql, PGconn *conn,
@@ -1510,19 +1740,16 @@ get_remote_estimate(const char *sql, PGconn *conn,
/* PGresult must be released before leaving this function. */
PG_TRY();
{
- StringInfoData buf;
char *line;
char *p;
int n;
/*
- * Execute EXPLAIN remotely on given SQL statement.
+ * Execute EXPLAIN remotely.
*/
- initStringInfo(&buf);
- appendStringInfo(&buf, "EXPLAIN %s", sql);
- res = PQexec(conn, buf.data);
+ res = PQexec(conn, sql);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
- pgfdw_report_error(ERROR, res, false, buf.data);
+ pgfdw_report_error(ERROR, res, false, sql);
/*
* Extract cost numbers for topmost plan node. Note we search for a
@@ -1551,14 +1778,45 @@ get_remote_estimate(const char *sql, PGconn *conn,
}
/*
+ * Detect whether we want to process an EquivalenceClass member.
+ *
+ * This is a callback for use by generate_implied_equalities_for_column.
+ */
+static bool
+ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
+ EquivalenceClass *ec, EquivalenceMember *em,
+ void *arg)
+{
+ ec_member_foreign_arg *state = (ec_member_foreign_arg *) arg;
+ Expr *expr = em->em_expr;
+
+ /*
+ * If we've identified what we're processing in the current scan, we only
+ * want to match that expression.
+ */
+ if (state->current != NULL)
+ return equal(expr, state->current);
+
+ /*
+ * Otherwise, ignore anything we've already processed.
+ */
+ if (list_member(state->already_used, expr))
+ return false;
+
+ /* This is the new target to process. */
+ state->current = expr;
+ return true;
+}
+
+/*
* Create cursor for node's query with current parameter values.
*/
static void
create_cursor(ForeignScanState *node)
{
PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state;
+ ExprContext *econtext = node->ss.ps.ps_ExprContext;
int numParams = fsstate->numParams;
- Oid *types = fsstate->param_types;
const char **values = fsstate->param_values;
PGconn *conn = fsstate->conn;
char *sql;
@@ -1566,69 +1824,46 @@ create_cursor(ForeignScanState *node)
PGresult *res;
/*
- * Construct array of external parameter values in text format. Since
- * there might be random unconvertible stuff in the ParamExternData array,
- * take care to convert only values we actually need.
- *
- * Note that we leak the memory for the value strings until end of query;
- * this doesn't seem like a big problem, and in any case we might need to
- * recreate the cursor after a rescan, so we could need to re-use the
- * values anyway.
+ * Construct array of query parameter values in text format. We do the
+ * conversions in the short-lived per-tuple context, so as not to cause a
+ * memory leak over repeated scans.
*/
- if (numParams > 0 && !fsstate->extparams_done)
+ if (numParams > 0)
{
- ParamListInfo params = node->ss.ps.state->es_param_list_info;
int nestlevel;
- List *param_numbers;
+ MemoryContext oldcontext;
+ int i;
ListCell *lc;
+ oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
+
nestlevel = set_transmission_modes();
- param_numbers = (List *)
- list_nth(fsstate->fdw_private, FdwScanPrivateExternParamIds);
- foreach(lc, param_numbers)
+ i = 0;
+ foreach(lc, fsstate->param_exprs)
{
- int paramno = lfirst_int(lc);
- ParamExternData *prm = &params->params[paramno - 1];
-
- /* give hook a chance in case parameter is dynamic */
- if (!OidIsValid(prm->ptype) && params->paramFetch != NULL)
- params->paramFetch(params, paramno);
+ ExprState *expr_state = (ExprState *) lfirst(lc);
+ Datum expr_value;
+ bool isNull;
- /*
- * Force the remote server to infer a type for this parameter.
- * Since we explicitly cast every parameter (see deparse.c), the
- * "inference" is trivial and will produce the desired result.
- * This allows us to avoid assuming that the remote server has the
- * same OIDs we do for the parameters' types.
- *
- * We'd not need to pass a type array to PQexecParams at all,
- * except that there may be unused holes in the array, which will
- * have to be filled with something or the remote server will
- * complain. We arbitrarily set them to INT4OID earlier.
- */
- types[paramno - 1] = InvalidOid;
+ /* Evaluate the parameter expression */
+ expr_value = ExecEvalExpr(expr_state, econtext, &isNull, NULL);
/*
* Get string representation of each parameter value by invoking
* type-specific output function, unless the value is null.
*/
- if (prm->isnull)
- values[paramno - 1] = NULL;
+ if (isNull)
+ values[i] = NULL;
else
- {
- Oid out_func;
- bool isvarlena;
-
- getTypeOutputInfo(prm->ptype, &out_func, &isvarlena);
- values[paramno - 1] = OidOutputFunctionCall(out_func,
- prm->value);
- }
+ values[i] = OutputFunctionCall(&fsstate->param_flinfo[i],
+ expr_value);
+ i++;
}
reset_transmission_modes(nestlevel);
- fsstate->extparams_done = true;
+ MemoryContextSwitchTo(oldcontext);
}
/* Construct the DECLARE CURSOR command */
@@ -1638,10 +1873,16 @@ create_cursor(ForeignScanState *node)
fsstate->cursor_number, sql);
/*
+ * Notice that we pass NULL for paramTypes, thus forcing the remote server
+ * to infer types for all parameters. Since we explicitly cast every
+ * parameter (see deparse.c), the "inference" is trivial and will produce
+ * the desired result. This allows us to avoid assuming that the remote
+ * server has the same OIDs we do for the parameters' types.
+ *
* We don't use a PG_TRY block here, so be careful not to throw error
* without releasing the PGresult.
*/
- res = PQexecParams(conn, buf.data, numParams, types, values,
+ res = PQexecParams(conn, buf.data, numParams, NULL, values,
NULL, NULL, 0);
if (PQresultStatus(res) != PGRES_COMMAND_OK)
pgfdw_report_error(ERROR, res, true, sql);