From 9cbc4b80ddc10b36c64514104caa69747c386dcf Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 21 Mar 2013 19:43:59 -0400 Subject: Redo postgres_fdw's planner code so it can handle parameterized paths. I wasn't going to ship this without having at least some example of how to do that. This version isn't terribly bright; in particular it won't consider any combinations of multiple join clauses. Given the cost of executing a remote EXPLAIN, I'm not sure we want to be very aggressive about doing that, anyway. In support of this, refactor generate_implied_equalities_for_indexcol so that it can be used to extract equivalence clauses that aren't necessarily tied to an index. --- contrib/postgres_fdw/postgres_fdw.c | 941 ++++++++++++++++++++++-------------- 1 file changed, 591 insertions(+), 350 deletions(-) (limited to 'contrib/postgres_fdw/postgres_fdw.c') diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 58dd12c575a..982a8d9a61f 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -23,10 +23,13 @@ #include "funcapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" +#include "optimizer/paths.h" #include "optimizer/planmain.h" #include "optimizer/prep.h" +#include "optimizer/restrictinfo.h" #include "optimizer/var.h" #include "parser/parsetree.h" #include "utils/builtins.h" @@ -49,28 +52,41 @@ PG_MODULE_MAGIC; */ typedef struct PgFdwRelationInfo { - /* XXX underdocumented, but a lot of this shouldn't be here anyway */ - StringInfoData sql; - Cost startup_cost; - Cost total_cost; + /* baserestrictinfo clauses, broken down into safe and unsafe subsets. */ List *remote_conds; - List *param_conds; List *local_conds; - List *param_numbers; + + /* Bitmap of attr numbers we need to fetch from the remote server. */ + Bitmapset *attrs_used; + + /* Cost and selectivity of local_conds. */ + QualCost local_conds_cost; + Selectivity local_conds_sel; + + /* Estimated size and cost for a scan with baserestrictinfo quals. */ + double rows; + int width; + Cost startup_cost; + Cost total_cost; + + /* Options extracted from catalogs. */ + bool use_remote_estimate; + Cost fdw_startup_cost; + Cost fdw_tuple_cost; /* Cached catalog information. */ ForeignTable *table; ForeignServer *server; + UserMapping *user; /* only set in use_remote_estimate mode */ } PgFdwRelationInfo; /* * Indexes of FDW-private information stored in fdw_private lists. * * We store various information in ForeignScan.fdw_private to pass it from - * planner to executor. Specifically there is: + * planner to executor. Currently there is just: * * 1) SELECT statement text to be sent to the remote server - * 2) IDs of PARAM_EXEC Params used in the SELECT statement * * These items are indexed with the enum FdwScanPrivateIndex, so an item * can be fetched with list_nth(). For example, to get the SELECT statement: @@ -79,9 +95,7 @@ typedef struct PgFdwRelationInfo enum FdwScanPrivateIndex { /* SQL statement to execute remotely (as a String node) */ - FdwScanPrivateSelectSql, - /* Integer list of param IDs of PARAM_EXEC Params used in SQL stmt */ - FdwScanPrivateExternParamIds + FdwScanPrivateSelectSql }; /* @@ -117,10 +131,10 @@ typedef struct PgFdwScanState PGconn *conn; /* connection for the scan */ unsigned int cursor_number; /* quasi-unique ID for my cursor */ bool cursor_exists; /* have we created the cursor? */ - bool extparams_done; /* have we converted PARAM_EXTERN params? */ int numParams; /* number of parameters passed to query */ - Oid *param_types; /* array of types of query parameters */ - const char **param_values; /* array of values of query parameters */ + FmgrInfo *param_flinfo; /* output conversion functions for them */ + List *param_exprs; /* executable expressions for param values */ + const char **param_values; /* textual values of query parameters */ /* for storing result tuples */ HeapTuple *tuples; /* array of currently-retrieved tuples */ @@ -194,6 +208,13 @@ typedef struct ConversionLocation AttrNumber cur_attno; /* attribute number being processed, or 0 */ } ConversionLocation; +/* Callback argument for ec_member_matches_foreign */ +typedef struct +{ + Expr *current; /* current expr, or NULL if not yet found */ + List *already_used; /* expressions already dealt with */ +} ec_member_foreign_arg; + /* * SQL functions */ @@ -260,12 +281,20 @@ static bool postgresAnalyzeForeignTable(Relation relation, /* * Helper functions */ +static void estimate_path_cost_size(PlannerInfo *root, + RelOptInfo *baserel, + List *join_conds, + double *p_rows, int *p_width, + Cost *p_startup_cost, Cost *p_total_cost); static void get_remote_estimate(const char *sql, PGconn *conn, double *rows, int *width, Cost *startup_cost, Cost *total_cost); +static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel, + EquivalenceClass *ec, EquivalenceMember *em, + void *arg); static void create_cursor(ForeignScanState *node); static void fetch_more_data(ForeignScanState *node); static void close_cursor(PGconn *conn, unsigned int cursor_number); @@ -330,38 +359,15 @@ postgres_fdw_handler(PG_FUNCTION_ARGS) * postgresGetForeignRelSize * Estimate # of rows and width of the result of the scan * - * Here we estimate number of rows returned by the scan in two steps. In the - * first step, we execute remote EXPLAIN command to obtain the number of rows - * returned from remote side. In the second step, we calculate the selectivity - * of the filtering done on local side, and modify first estimate. - * - * We have to get some catalog objects and generate remote query string here, - * so we store such expensive information in FDW private area of RelOptInfo and - * pass them to subsequent functions for reuse. + * We should consider the effect of all baserestrictinfo clauses here, but + * not any join clauses. */ static void postgresGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid) { - bool use_remote_estimate = false; PgFdwRelationInfo *fpinfo; - StringInfo sql; - ForeignTable *table; - ForeignServer *server; - Selectivity sel; - double rows; - int width; - Cost startup_cost; - Cost total_cost; - Cost run_cost; - QualCost qpqual_cost; - Cost cpu_per_tuple; - List *remote_conds; - List *param_conds; - List *local_conds; - List *param_numbers; - Bitmapset *attrs_used; ListCell *lc; /* @@ -369,127 +375,122 @@ postgresGetForeignRelSize(PlannerInfo *root, * functions. */ fpinfo = (PgFdwRelationInfo *) palloc0(sizeof(PgFdwRelationInfo)); - initStringInfo(&fpinfo->sql); - sql = &fpinfo->sql; + baserel->fdw_private = (void *) fpinfo; + + /* Look up foreign-table catalog info. */ + fpinfo->table = GetForeignTable(foreigntableid); + fpinfo->server = GetForeignServer(fpinfo->table->serverid); /* - * Determine whether we use remote estimate or not. Note that per-table - * setting overrides per-server setting. + * Extract user-settable option values. Note that per-table setting of + * use_remote_estimate overrides per-server setting. */ - table = GetForeignTable(foreigntableid); - server = GetForeignServer(table->serverid); - foreach(lc, server->options) + fpinfo->use_remote_estimate = false; + fpinfo->fdw_startup_cost = DEFAULT_FDW_STARTUP_COST; + fpinfo->fdw_tuple_cost = DEFAULT_FDW_TUPLE_COST; + + foreach(lc, fpinfo->server->options) { DefElem *def = (DefElem *) lfirst(lc); if (strcmp(def->defname, "use_remote_estimate") == 0) - { - use_remote_estimate = defGetBoolean(def); - break; - } + fpinfo->use_remote_estimate = defGetBoolean(def); + else if (strcmp(def->defname, "fdw_startup_cost") == 0) + fpinfo->fdw_startup_cost = strtod(defGetString(def), NULL); + else if (strcmp(def->defname, "fdw_tuple_cost") == 0) + fpinfo->fdw_tuple_cost = strtod(defGetString(def), NULL); } - foreach(lc, table->options) + foreach(lc, fpinfo->table->options) { DefElem *def = (DefElem *) lfirst(lc); if (strcmp(def->defname, "use_remote_estimate") == 0) { - use_remote_estimate = defGetBoolean(def); - break; + fpinfo->use_remote_estimate = defGetBoolean(def); + break; /* only need the one value */ } } /* - * Identify which restriction clauses can be sent to the remote server and - * which can't. Conditions that are remotely executable but contain - * PARAM_EXTERN Params have to be treated separately because we can't use - * placeholders in remote EXPLAIN. + * If the table or the server is configured to use remote estimates, + * identify which user to do remote access as during planning. This + * should match what ExecCheckRTEPerms() does. If we fail due to lack of + * permissions, the query would have failed at runtime anyway. + */ + if (fpinfo->use_remote_estimate) + { + RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root); + Oid userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); + + fpinfo->user = GetUserMapping(userid, fpinfo->server->serverid); + } + else + fpinfo->user = NULL; + + /* + * Identify which baserestrictinfo clauses can be sent to the remote + * server and which can't. */ - classifyConditions(root, baserel, &remote_conds, ¶m_conds, - &local_conds, ¶m_numbers); + classifyConditions(root, baserel, + &fpinfo->remote_conds, &fpinfo->local_conds); /* * Identify which attributes will need to be retrieved from the remote - * server. These include all attrs needed for joins or final output, plus - * all attrs used in the local_conds. + * server. These include all attrs needed for joins or final output, plus + * all attrs used in the local_conds. (Note: if we end up using a + * parameterized scan, it's possible that some of the join clauses will be + * sent to the remote and thus we wouldn't really need to retrieve the + * columns used in them. Doesn't seem worth detecting that case though.) */ - attrs_used = NULL; + fpinfo->attrs_used = NULL; pull_varattnos((Node *) baserel->reltargetlist, baserel->relid, - &attrs_used); - foreach(lc, local_conds) + &fpinfo->attrs_used); + foreach(lc, fpinfo->local_conds) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); pull_varattnos((Node *) rinfo->clause, baserel->relid, - &attrs_used); + &fpinfo->attrs_used); } /* - * Construct remote query which consists of SELECT, FROM, and WHERE - * clauses. For now, leave out the param_conds. + * Compute the selectivity and cost of the local_conds, so we don't have + * to do it over again for each path. The best we can do for these + * conditions is to estimate selectivity on the basis of local statistics. */ - deparseSelectSql(sql, root, baserel, attrs_used); - if (remote_conds) - appendWhereClause(sql, root, remote_conds, true); + fpinfo->local_conds_sel = clauselist_selectivity(root, + fpinfo->local_conds, + baserel->relid, + JOIN_INNER, + NULL); + + cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root); /* * If the table or the server is configured to use remote estimates, - * connect to the foreign server and execute EXPLAIN with the quals that - * don't contain any Param nodes. Otherwise, estimate rows using whatever - * statistics we have locally, in a way similar to ordinary tables. + * connect to the foreign server and execute EXPLAIN to estimate the + * number of rows selected by the restriction clauses, as well as the + * average row width. Otherwise, estimate using whatever statistics we + * have locally, in a way similar to ordinary tables. */ - if (use_remote_estimate) + if (fpinfo->use_remote_estimate) { - RangeTblEntry *rte; - Oid userid; - UserMapping *user; - PGconn *conn; - /* - * Identify which user to do the remote access as. This should match - * what ExecCheckRTEPerms() does. If we fail due to lack of - * permissions, the query would have failed at runtime anyway. + * Get cost/size estimates with help of remote server. Save the + * values in fpinfo so we don't need to do it again to generate the + * basic foreign path. */ - rte = planner_rt_fetch(baserel->relid, root); - userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); + estimate_path_cost_size(root, baserel, NIL, + &fpinfo->rows, &fpinfo->width, + &fpinfo->startup_cost, &fpinfo->total_cost); - user = GetUserMapping(userid, server->serverid); - conn = GetConnection(server, user, false); - get_remote_estimate(sql->data, conn, &rows, &width, - &startup_cost, &total_cost); - ReleaseConnection(conn); - - /* - * Estimate selectivity of conditions which were not used in remote - * EXPLAIN by calling clauselist_selectivity(). The best we can do - * for these conditions is to estimate selectivity on the basis of - * local statistics. - */ - sel = clauselist_selectivity(root, param_conds, - baserel->relid, JOIN_INNER, NULL); - sel *= clauselist_selectivity(root, local_conds, - baserel->relid, JOIN_INNER, NULL); - - /* - * Add in the eval cost of those conditions, too. - */ - cost_qual_eval(&qpqual_cost, param_conds, root); - startup_cost += qpqual_cost.startup; - total_cost += qpqual_cost.per_tuple * rows; - cost_qual_eval(&qpqual_cost, local_conds, root); - startup_cost += qpqual_cost.startup; - total_cost += qpqual_cost.per_tuple * rows; - - /* Report estimated numbers to planner. */ - baserel->rows = clamp_row_est(rows * sel); - baserel->width = width; + /* Report estimated baserel size to planner. */ + baserel->rows = fpinfo->rows; + baserel->width = fpinfo->width; } else { /* - * Estimate rows from the result of the last ANALYZE, using all - * conditions specified in original query. - * * If the foreign table has never been ANALYZEd, it will have relpages * and reltuples equal to zero, which most likely has nothing to do * with reality. We can't do a whole lot about that if we're not @@ -505,83 +506,14 @@ postgresGetForeignRelSize(PlannerInfo *root, (10 * BLCKSZ) / (baserel->width + sizeof(HeapTupleHeaderData)); } + /* Estimate baserel size as best we can with local statistics. */ set_baserel_size_estimates(root, baserel); - /* Cost as though this were a seqscan, which is pessimistic. */ - startup_cost = 0; - run_cost = 0; - run_cost += seq_page_cost * baserel->pages; - - startup_cost += baserel->baserestrictcost.startup; - cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; - run_cost += cpu_per_tuple * baserel->tuples; - - total_cost = startup_cost + run_cost; + /* Fill in basically-bogus cost estimates for use later. */ + estimate_path_cost_size(root, baserel, NIL, + &fpinfo->rows, &fpinfo->width, + &fpinfo->startup_cost, &fpinfo->total_cost); } - - /* - * Finish deparsing remote query by adding conditions which were unusable - * in remote EXPLAIN because they contain Param nodes. - */ - if (param_conds) - appendWhereClause(sql, root, param_conds, (remote_conds == NIL)); - - /* - * Add FOR UPDATE/SHARE if appropriate. We apply locking during the - * initial row fetch, rather than later on as is done for local tables. - * The extra roundtrips involved in trying to duplicate the local - * semantics exactly don't seem worthwhile (see also comments for - * RowMarkType). - */ - if (baserel->relid == root->parse->resultRelation && - (root->parse->commandType == CMD_UPDATE || - root->parse->commandType == CMD_DELETE)) - { - /* Relation is UPDATE/DELETE target, so use FOR UPDATE */ - appendStringInfo(sql, " FOR UPDATE"); - } - else - { - RowMarkClause *rc = get_parse_rowmark(root->parse, baserel->relid); - - if (rc) - { - /* - * Relation is specified as a FOR UPDATE/SHARE target, so handle - * that. - * - * For now, just ignore any [NO] KEY specification, since (a) it's - * not clear what that means for a remote table that we don't have - * complete information about, and (b) it wouldn't work anyway on - * older remote servers. Likewise, we don't worry about NOWAIT. - */ - switch (rc->strength) - { - case LCS_FORKEYSHARE: - case LCS_FORSHARE: - appendStringInfo(sql, " FOR SHARE"); - break; - case LCS_FORNOKEYUPDATE: - case LCS_FORUPDATE: - appendStringInfo(sql, " FOR UPDATE"); - break; - } - } - } - - /* - * Store obtained information into FDW-private area of RelOptInfo so it's - * available to subsequent functions. - */ - fpinfo->startup_cost = startup_cost; - fpinfo->total_cost = total_cost; - fpinfo->remote_conds = remote_conds; - fpinfo->param_conds = param_conds; - fpinfo->local_conds = local_conds; - fpinfo->param_numbers = param_numbers; - fpinfo->table = table; - fpinfo->server = server; - baserel->fdw_private = (void *) fpinfo; } /* @@ -595,77 +527,187 @@ postgresGetForeignPaths(PlannerInfo *root, { PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private; ForeignPath *path; - ListCell *lc; - double fdw_startup_cost = DEFAULT_FDW_STARTUP_COST; - double fdw_tuple_cost = DEFAULT_FDW_TUPLE_COST; + Relids lateral_referencers; + List *join_quals; + Relids required_outer; + double rows; + int width; Cost startup_cost; Cost total_cost; - List *fdw_private; + ListCell *lc; /* - * Check for user override of fdw_startup_cost, fdw_tuple_cost values + * Create simplest ForeignScan path node and add it to baserel. This path + * corresponds to SeqScan path of regular tables (though depending on what + * baserestrict conditions we were able to send to remote, there might + * actually be an indexscan happening there). We already did all the work + * to estimate cost and size of this path. */ - foreach(lc, fpinfo->server->options) - { - DefElem *d = (DefElem *) lfirst(lc); - - if (strcmp(d->defname, "fdw_startup_cost") == 0) - fdw_startup_cost = strtod(defGetString(d), NULL); - else if (strcmp(d->defname, "fdw_tuple_cost") == 0) - fdw_tuple_cost = strtod(defGetString(d), NULL); - } + path = create_foreignscan_path(root, baserel, + fpinfo->rows, + fpinfo->startup_cost, + fpinfo->total_cost, + NIL, /* no pathkeys */ + NULL, /* no outer rel either */ + NIL); /* no fdw_private list */ + add_path(baserel, (Path *) path); /* - * We have cost values which are estimated on remote side, so adjust them - * for better estimate which respect various stuffs to complete the scan, - * such as sending query, transferring result, and local filtering. - */ - startup_cost = fpinfo->startup_cost; - total_cost = fpinfo->total_cost; - - /*---------- - * Adjust costs with factors of the corresponding foreign server: - * - add cost to establish connection to both startup and total - * - add cost to manipulate on remote, and transfer result to total - * - add cost to manipulate tuples on local side to total - *---------- + * If we're not using remote estimates, stop here. We have no way to + * estimate whether any join clauses would be worth sending across, so + * don't bother building parameterized paths. */ - startup_cost += fdw_startup_cost; - total_cost += fdw_startup_cost; - total_cost += fdw_tuple_cost * baserel->rows; - total_cost += cpu_tuple_cost * baserel->rows; + if (!fpinfo->use_remote_estimate) + return; /* - * Build the fdw_private list that will be available to the executor. - * Items in the list must match enum FdwScanPrivateIndex, above. + * As a crude first hack, we consider each available join clause and try + * to make a parameterized path using just that clause. Later we should + * consider combinations of clauses, probably. */ - fdw_private = list_make2(makeString(fpinfo->sql.data), - fpinfo->param_numbers); /* - * Create simplest ForeignScan path node and add it to baserel. This path - * corresponds to SeqScan path of regular tables (though depending on what - * baserestrict conditions we were able to send to remote, there might - * actually be an indexscan happening there). + * If there are any rels that have LATERAL references to this one, we + * cannot use join quals referencing them as remote quals for this one, + * since such rels would have to be on the inside not the outside of a + * nestloop join relative to this one. Create a Relids set listing all + * such rels, for use in checks of potential join clauses. */ - path = create_foreignscan_path(root, baserel, - baserel->rows, - startup_cost, - total_cost, - NIL, /* no pathkeys */ - NULL, /* no outer rel either */ - fdw_private); - add_path(baserel, (Path *) path); + lateral_referencers = NULL; + foreach(lc, root->lateral_info_list) + { + LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(lc); + + if (bms_is_member(baserel->relid, ljinfo->lateral_lhs)) + lateral_referencers = bms_add_member(lateral_referencers, + ljinfo->lateral_rhs); + } + + /* Scan the rel's join clauses */ + foreach(lc, baserel->joininfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + /* Check if clause can be moved to this rel */ + if (!join_clause_is_movable_to(rinfo, baserel->relid)) + continue; + + /* Not useful if it conflicts with any LATERAL references */ + if (bms_overlap(rinfo->clause_relids, lateral_referencers)) + continue; + + /* See if it is safe to send to remote */ + if (!is_foreign_expr(root, baserel, rinfo->clause)) + continue; + + /* + * OK, get a cost estimate from the remote, and make a path. + */ + join_quals = list_make1(rinfo); + estimate_path_cost_size(root, baserel, join_quals, + &rows, &width, + &startup_cost, &total_cost); + + /* Must calculate required outer rels for this path */ + required_outer = bms_union(rinfo->clause_relids, + baserel->lateral_relids); + /* We do not want the foreign rel itself listed in required_outer */ + required_outer = bms_del_member(required_outer, baserel->relid); + /* Enforce convention that required_outer is exactly NULL if empty */ + if (bms_is_empty(required_outer)) + required_outer = NULL; + + path = create_foreignscan_path(root, baserel, + rows, + startup_cost, + total_cost, + NIL, /* no pathkeys */ + required_outer, + NIL); /* no fdw_private list */ + add_path(baserel, (Path *) path); + } /* - * XXX We can consider sorted path or parameterized path here if we know - * that foreign table is indexed on remote end. For this purpose, we - * might have to support FOREIGN INDEX to represent possible sets of sort - * keys and/or filtering. Or we could just try some join conditions and - * see if remote side estimates using them as markedly cheaper. Note that - * executor functions need work to support internal Params before we can - * try generating any parameterized paths, though. + * The above scan examined only "generic" join clauses, not those that + * were absorbed into EquivalenceClauses. See if we can make anything out + * of EquivalenceClauses. */ + if (baserel->has_eclass_joins) + { + /* + * We repeatedly scan the eclass list looking for column references + * (or expressions) belonging to the foreign rel. Each time we find + * one, we generate a list of equivalence joinclauses for it, and then + * try to make those into foreign paths. Repeat till there are no + * more candidate EC members. + */ + ec_member_foreign_arg arg; + + arg.already_used = NIL; + for (;;) + { + List *clauses; + + /* Make clauses, skipping any that join to lateral_referencers */ + arg.current = NULL; + clauses = generate_implied_equalities_for_column(root, + baserel, + ec_member_matches_foreign, + (void *) &arg, + lateral_referencers); + + /* Done if there are no more expressions in the foreign rel */ + if (arg.current == NULL) + { + Assert(clauses == NIL); + break; + } + + /* Scan the extracted join clauses */ + foreach(lc, clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + /* Check if clause can be moved to this rel */ + if (!join_clause_is_movable_to(rinfo, baserel->relid)) + continue; + + /* Shouldn't conflict with any LATERAL references */ + Assert(!bms_overlap(rinfo->clause_relids, lateral_referencers)); + + /* See if it is safe to send to remote */ + if (!is_foreign_expr(root, baserel, rinfo->clause)) + continue; + + /* + * OK, get a cost estimate from the remote, and make a path. + */ + join_quals = list_make1(rinfo); + estimate_path_cost_size(root, baserel, join_quals, + &rows, &width, + &startup_cost, &total_cost); + + /* Must calculate required outer rels for this path */ + required_outer = bms_union(rinfo->clause_relids, + baserel->lateral_relids); + required_outer = bms_del_member(required_outer, baserel->relid); + if (bms_is_empty(required_outer)) + required_outer = NULL; + + path = create_foreignscan_path(root, baserel, + rows, + startup_cost, + total_cost, + NIL, /* no pathkeys */ + required_outer, + NIL); /* no fdw_private */ + add_path(baserel, (Path *) path); + } + + /* Try again, now ignoring the expression we found this time */ + arg.already_used = lappend(arg.already_used, arg.current); + } + } } /* @@ -682,19 +724,32 @@ postgresGetForeignPlan(PlannerInfo *root, { PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private; Index scan_relid = baserel->relid; - List *fdw_private = best_path->fdw_private; - List *remote_exprs = NIL; + List *fdw_private; + List *remote_conds = NIL; List *local_exprs = NIL; + List *params_list = NIL; + StringInfoData sql; ListCell *lc; /* * Separate the scan_clauses into those that can be executed remotely and - * those that can't. For now, we accept only remote clauses that were - * previously determined to be safe by classifyClauses (so, only - * baserestrictinfo clauses can be used that way). + * those that can't. baserestrictinfo clauses that were previously + * determined to be safe or unsafe by classifyClauses are shown in + * fpinfo->remote_conds and fpinfo->local_conds. Anything else in the + * scan_clauses list should be a join clause that was found safe by + * postgresGetForeignPaths. + * + * Note: for clauses extracted from EquivalenceClasses, it's possible that + * what we get here is a different representation of the clause than what + * postgresGetForeignPaths saw; for example we might get a commuted + * version of the clause. So we can't insist on simple equality as we do + * for the baserestrictinfo clauses. * * This code must match "extract_actual_clauses(scan_clauses, false)" * except for the additional decision about remote versus local execution. + * Note however that we only strip the RestrictInfo nodes from the + * local_exprs list, since appendWhereClause expects a list of + * RestrictInfos. */ foreach(lc, scan_clauses) { @@ -706,30 +761,88 @@ postgresGetForeignPlan(PlannerInfo *root, if (rinfo->pseudoconstant) continue; - /* Either simple or parameterized remote clauses are OK now */ - if (list_member_ptr(fpinfo->remote_conds, rinfo) || - list_member_ptr(fpinfo->param_conds, rinfo)) - remote_exprs = lappend(remote_exprs, rinfo->clause); - else + if (list_member_ptr(fpinfo->remote_conds, rinfo)) + remote_conds = lappend(remote_conds, rinfo); + else if (list_member_ptr(fpinfo->local_conds, rinfo)) local_exprs = lappend(local_exprs, rinfo->clause); + else + { + Assert(is_foreign_expr(root, baserel, rinfo->clause)); + remote_conds = lappend(remote_conds, rinfo); + } } + /* + * Build the query string to be sent for execution, and identify + * expressions to be sent as parameters. + */ + initStringInfo(&sql); + deparseSelectSql(&sql, root, baserel, fpinfo->attrs_used); + if (remote_conds) + appendWhereClause(&sql, root, baserel, remote_conds, + true, ¶ms_list); + + /* + * Add FOR UPDATE/SHARE if appropriate. We apply locking during the + * initial row fetch, rather than later on as is done for local tables. + * The extra roundtrips involved in trying to duplicate the local + * semantics exactly don't seem worthwhile (see also comments for + * RowMarkType). + */ + if (baserel->relid == root->parse->resultRelation && + (root->parse->commandType == CMD_UPDATE || + root->parse->commandType == CMD_DELETE)) + { + /* Relation is UPDATE/DELETE target, so use FOR UPDATE */ + appendStringInfo(&sql, " FOR UPDATE"); + } + else + { + RowMarkClause *rc = get_parse_rowmark(root->parse, baserel->relid); + + if (rc) + { + /* + * Relation is specified as a FOR UPDATE/SHARE target, so handle + * that. + * + * For now, just ignore any [NO] KEY specification, since (a) it's + * not clear what that means for a remote table that we don't have + * complete information about, and (b) it wouldn't work anyway on + * older remote servers. Likewise, we don't worry about NOWAIT. + */ + switch (rc->strength) + { + case LCS_FORKEYSHARE: + case LCS_FORSHARE: + appendStringInfo(&sql, " FOR SHARE"); + break; + case LCS_FORNOKEYUPDATE: + case LCS_FORUPDATE: + appendStringInfo(&sql, " FOR UPDATE"); + break; + } + } + } + + /* + * Build the fdw_private list that will be available to the executor. + * Items in the list must match enum FdwScanPrivateIndex, above. + */ + fdw_private = list_make1(makeString(sql.data)); + /* * Create the ForeignScan node from target list, local filtering - * expressions, remote filtering expressions, and FDW private information. - * - * Note that the remote_exprs are stored in the fdw_exprs field of the - * finished plan node; we can't keep them in private state because then - * they wouldn't be subject to later planner processing. + * expressions, remote parameter expressions, and FDW private information. * - * XXX Currently, the remote_exprs aren't actually used at runtime, so we - * don't need to store them at all. But we'll keep this behavior for a - * little while for debugging reasons. + * Note that the remote parameter expressions are stored in the fdw_exprs + * field of the finished plan node; we can't keep them in private state + * because then they wouldn't be subject to later planner processing. */ return make_foreignscan(tlist, local_exprs, scan_relid, - remote_exprs, + params_list, fdw_private); } @@ -748,9 +861,9 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags) ForeignTable *table; ForeignServer *server; UserMapping *user; - List *param_numbers; int numParams; int i; + ListCell *lc; /* * Do nothing in EXPLAIN (no ANALYZE) case. node->fdw_state stays NULL. @@ -805,40 +918,42 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags) /* Get info we'll need for data conversion. */ fsstate->attinmeta = TupleDescGetAttInMetadata(RelationGetDescr(fsstate->rel)); - /* - * Allocate buffer for query parameters, if the remote conditions use any. - * - * We use a parameter slot for each PARAM_EXTERN parameter, even though - * not all of them may get sent to the remote server. This allows us to - * refer to Params by their original number rather than remapping, and it - * doesn't cost much. Slots that are not actually used get filled with - * null values that are arbitrarily marked as being of type int4. - */ - param_numbers = (List *) - list_nth(fsstate->fdw_private, FdwScanPrivateExternParamIds); - if (param_numbers != NIL) + /* Prepare for output conversion of parameters used in remote query. */ + numParams = list_length(fsplan->fdw_exprs); + fsstate->numParams = numParams; + fsstate->param_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo) * numParams); + + i = 0; + foreach(lc, fsplan->fdw_exprs) { - ParamListInfo params = estate->es_param_list_info; + Node *param_expr = (Node *) lfirst(lc); + Oid typefnoid; + bool isvarlena; - numParams = params ? params->numParams : 0; + getTypeOutputInfo(exprType(param_expr), &typefnoid, &isvarlena); + fmgr_info(typefnoid, &fsstate->param_flinfo[i]); + i++; } - else - numParams = 0; - fsstate->numParams = numParams; + + /* + * Prepare remote-parameter expressions for evaluation. (Note: in + * practice, we expect that all these expressions will be just Params, so + * we could possibly do something more efficient than using the full + * expression-eval machinery for this. But probably there would be little + * benefit, and it'd require postgres_fdw to know more than is desirable + * about Param evaluation.) + */ + fsstate->param_exprs = (List *) + ExecInitExpr((Expr *) fsplan->fdw_exprs, + (PlanState *) node); + + /* + * Allocate buffer for text form of query parameters, if any. + */ if (numParams > 0) - { - /* we initially fill all slots with value = NULL, type = int4 */ - fsstate->param_types = (Oid *) palloc(numParams * sizeof(Oid)); fsstate->param_values = (const char **) palloc0(numParams * sizeof(char *)); - for (i = 0; i < numParams; i++) - fsstate->param_types[i] = INT4OID; - } else - { - fsstate->param_types = NULL; fsstate->param_values = NULL; - } - fsstate->extparams_done = false; } /* @@ -894,11 +1009,6 @@ postgresReScanForeignScan(ForeignScanState *node) char sql[64]; PGresult *res; - /* - * Note: we assume that PARAM_EXTERN params don't change over the life of - * the query, so no need to reset extparams_done. - */ - /* If we haven't created the cursor yet, nothing to do. */ if (!fsstate->cursor_exists) return; @@ -1497,8 +1607,128 @@ postgresExplainForeignModify(ModifyTableState *mtstate, } } + +/* + * estimate_path_cost_size + * Get cost and size estimates for a foreign scan + * + * We assume that all the baserestrictinfo clauses will be applied, plus + * any join clauses listed in join_conds. + */ +static void +estimate_path_cost_size(PlannerInfo *root, + RelOptInfo *baserel, + List *join_conds, + double *p_rows, int *p_width, + Cost *p_startup_cost, Cost *p_total_cost) +{ + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private; + double rows; + double retrieved_rows; + int width; + Cost startup_cost; + Cost total_cost; + Cost run_cost; + Cost cpu_per_tuple; + + /* + * If the table or the server is configured to use remote estimates, + * connect to the foreign server and execute EXPLAIN to estimate the + * number of rows selected by the restriction+join clauses. Otherwise, + * estimate rows using whatever statistics we have locally, in a way + * similar to ordinary tables. + */ + if (fpinfo->use_remote_estimate) + { + StringInfoData sql; + PGconn *conn; + + /* + * Construct EXPLAIN query including the desired SELECT, FROM, and + * WHERE clauses. Params and other-relation Vars are replaced by + * dummy values. + */ + initStringInfo(&sql); + appendStringInfoString(&sql, "EXPLAIN "); + deparseSelectSql(&sql, root, baserel, fpinfo->attrs_used); + if (fpinfo->remote_conds) + appendWhereClause(&sql, root, baserel, fpinfo->remote_conds, + true, NULL); + if (join_conds) + appendWhereClause(&sql, root, baserel, join_conds, + (fpinfo->remote_conds == NIL), NULL); + + /* Get the remote estimate */ + conn = GetConnection(fpinfo->server, fpinfo->user, false); + get_remote_estimate(sql.data, conn, &rows, &width, + &startup_cost, &total_cost); + ReleaseConnection(conn); + + retrieved_rows = rows; + + /* Factor in the selectivity of the local_conds */ + rows = clamp_row_est(rows * fpinfo->local_conds_sel); + + /* Add in the eval cost of the local_conds */ + startup_cost += fpinfo->local_conds_cost.startup; + total_cost += fpinfo->local_conds_cost.per_tuple * retrieved_rows; + } + else + { + /* + * We don't support join conditions in this mode (hence, no + * parameterized paths can be made). + */ + Assert(join_conds == NIL); + + /* Use rows/width estimates made by set_baserel_size_estimates. */ + rows = baserel->rows; + width = baserel->width; + + /* + * Back into an estimate of the number of retrieved rows. Just in + * case this is nuts, clamp to at most baserel->tuples. + */ + retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel); + retrieved_rows = Min(retrieved_rows, baserel->tuples); + + /* + * Cost as though this were a seqscan, which is pessimistic. We + * effectively imagine the local_conds are being evaluated remotely, + * too. + */ + startup_cost = 0; + run_cost = 0; + run_cost += seq_page_cost * baserel->pages; + + startup_cost += baserel->baserestrictcost.startup; + cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; + run_cost += cpu_per_tuple * baserel->tuples; + + total_cost = startup_cost + run_cost; + } + + /* + * Add some additional cost factors to account for connection overhead + * (fdw_startup_cost), transferring data across the network + * (fdw_tuple_cost per retrieved row), and local manipulation of the data + * (cpu_tuple_cost per retrieved row). + */ + startup_cost += fpinfo->fdw_startup_cost; + total_cost += fpinfo->fdw_startup_cost; + total_cost += fpinfo->fdw_tuple_cost * retrieved_rows; + total_cost += cpu_tuple_cost * retrieved_rows; + + /* Return results. */ + *p_rows = rows; + *p_width = width; + *p_startup_cost = startup_cost; + *p_total_cost = total_cost; +} + /* - * Estimate costs of executing given SQL statement. + * Estimate costs of executing a SQL statement remotely. + * The given "sql" must be an EXPLAIN command. */ static void get_remote_estimate(const char *sql, PGconn *conn, @@ -1510,19 +1740,16 @@ get_remote_estimate(const char *sql, PGconn *conn, /* PGresult must be released before leaving this function. */ PG_TRY(); { - StringInfoData buf; char *line; char *p; int n; /* - * Execute EXPLAIN remotely on given SQL statement. + * Execute EXPLAIN remotely. */ - initStringInfo(&buf); - appendStringInfo(&buf, "EXPLAIN %s", sql); - res = PQexec(conn, buf.data); + res = PQexec(conn, sql); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, false, buf.data); + pgfdw_report_error(ERROR, res, false, sql); /* * Extract cost numbers for topmost plan node. Note we search for a @@ -1550,6 +1777,37 @@ get_remote_estimate(const char *sql, PGconn *conn, PG_END_TRY(); } +/* + * Detect whether we want to process an EquivalenceClass member. + * + * This is a callback for use by generate_implied_equalities_for_column. + */ +static bool +ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel, + EquivalenceClass *ec, EquivalenceMember *em, + void *arg) +{ + ec_member_foreign_arg *state = (ec_member_foreign_arg *) arg; + Expr *expr = em->em_expr; + + /* + * If we've identified what we're processing in the current scan, we only + * want to match that expression. + */ + if (state->current != NULL) + return equal(expr, state->current); + + /* + * Otherwise, ignore anything we've already processed. + */ + if (list_member(state->already_used, expr)) + return false; + + /* This is the new target to process. */ + state->current = expr; + return true; +} + /* * Create cursor for node's query with current parameter values. */ @@ -1557,8 +1815,8 @@ static void create_cursor(ForeignScanState *node) { PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state; + ExprContext *econtext = node->ss.ps.ps_ExprContext; int numParams = fsstate->numParams; - Oid *types = fsstate->param_types; const char **values = fsstate->param_values; PGconn *conn = fsstate->conn; char *sql; @@ -1566,69 +1824,46 @@ create_cursor(ForeignScanState *node) PGresult *res; /* - * Construct array of external parameter values in text format. Since - * there might be random unconvertible stuff in the ParamExternData array, - * take care to convert only values we actually need. - * - * Note that we leak the memory for the value strings until end of query; - * this doesn't seem like a big problem, and in any case we might need to - * recreate the cursor after a rescan, so we could need to re-use the - * values anyway. + * Construct array of query parameter values in text format. We do the + * conversions in the short-lived per-tuple context, so as not to cause a + * memory leak over repeated scans. */ - if (numParams > 0 && !fsstate->extparams_done) + if (numParams > 0) { - ParamListInfo params = node->ss.ps.state->es_param_list_info; int nestlevel; - List *param_numbers; + MemoryContext oldcontext; + int i; ListCell *lc; + oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + nestlevel = set_transmission_modes(); - param_numbers = (List *) - list_nth(fsstate->fdw_private, FdwScanPrivateExternParamIds); - foreach(lc, param_numbers) + i = 0; + foreach(lc, fsstate->param_exprs) { - int paramno = lfirst_int(lc); - ParamExternData *prm = ¶ms->params[paramno - 1]; - - /* give hook a chance in case parameter is dynamic */ - if (!OidIsValid(prm->ptype) && params->paramFetch != NULL) - params->paramFetch(params, paramno); + ExprState *expr_state = (ExprState *) lfirst(lc); + Datum expr_value; + bool isNull; - /* - * Force the remote server to infer a type for this parameter. - * Since we explicitly cast every parameter (see deparse.c), the - * "inference" is trivial and will produce the desired result. - * This allows us to avoid assuming that the remote server has the - * same OIDs we do for the parameters' types. - * - * We'd not need to pass a type array to PQexecParams at all, - * except that there may be unused holes in the array, which will - * have to be filled with something or the remote server will - * complain. We arbitrarily set them to INT4OID earlier. - */ - types[paramno - 1] = InvalidOid; + /* Evaluate the parameter expression */ + expr_value = ExecEvalExpr(expr_state, econtext, &isNull, NULL); /* * Get string representation of each parameter value by invoking * type-specific output function, unless the value is null. */ - if (prm->isnull) - values[paramno - 1] = NULL; + if (isNull) + values[i] = NULL; else - { - Oid out_func; - bool isvarlena; - - getTypeOutputInfo(prm->ptype, &out_func, &isvarlena); - values[paramno - 1] = OidOutputFunctionCall(out_func, - prm->value); - } + values[i] = OutputFunctionCall(&fsstate->param_flinfo[i], + expr_value); + i++; } reset_transmission_modes(nestlevel); - fsstate->extparams_done = true; + MemoryContextSwitchTo(oldcontext); } /* Construct the DECLARE CURSOR command */ @@ -1638,10 +1873,16 @@ create_cursor(ForeignScanState *node) fsstate->cursor_number, sql); /* + * Notice that we pass NULL for paramTypes, thus forcing the remote server + * to infer types for all parameters. Since we explicitly cast every + * parameter (see deparse.c), the "inference" is trivial and will produce + * the desired result. This allows us to avoid assuming that the remote + * server has the same OIDs we do for the parameters' types. + * * We don't use a PG_TRY block here, so be careful not to throw error * without releasing the PGresult. */ - res = PQexecParams(conn, buf.data, numParams, types, values, + res = PQexecParams(conn, buf.data, numParams, NULL, values, NULL, NULL, 0); if (PQresultStatus(res) != PGRES_COMMAND_OK) pgfdw_report_error(ERROR, res, true, sql); -- cgit v1.2.3