diff options
26 files changed, 1316 insertions, 68 deletions
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index b66caa4c082..f494ec98e51 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -73,6 +73,11 @@ static void show_upper_qual(List *qual, const char *qlabel, ExplainState *es); static void show_sort_keys(SortState *sortstate, List *ancestors, ExplainState *es); +static void show_merge_append_keys(MergeAppendState *mstate, List *ancestors, + ExplainState *es); +static void show_sort_keys_common(PlanState *planstate, + int nkeys, AttrNumber *keycols, + List *ancestors, ExplainState *es); static void show_sort_info(SortState *sortstate, ExplainState *es); static void show_hash_info(HashState *hashstate, ExplainState *es); static const char *explain_get_index_name(Oid indexId); @@ -647,6 +652,9 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_Append: pname = sname = "Append"; break; + case T_MergeAppend: + pname = sname = "Merge Append"; + break; case T_RecursiveUnion: pname = sname = "Recursive Union"; break; @@ -1074,6 +1082,10 @@ ExplainNode(PlanState *planstate, List *ancestors, show_sort_keys((SortState *) planstate, ancestors, es); show_sort_info((SortState *) planstate, es); break; + case T_MergeAppend: + show_merge_append_keys((MergeAppendState *) planstate, + ancestors, es); + break; case T_Result: show_upper_qual((List *) ((Result *) plan)->resconstantqual, "One-Time Filter", planstate, ancestors, es); @@ -1170,6 +1182,7 @@ ExplainNode(PlanState *planstate, List *ancestors, innerPlanState(planstate) || IsA(plan, ModifyTable) || IsA(plan, Append) || + IsA(plan, MergeAppend) || IsA(plan, BitmapAnd) || IsA(plan, BitmapOr) || IsA(plan, SubqueryScan) || @@ -1208,6 +1221,11 @@ ExplainNode(PlanState *planstate, List *ancestors, ((AppendState *) planstate)->appendplans, ancestors, es); break; + case T_MergeAppend: + ExplainMemberNodes(((MergeAppend *) plan)->mergeplans, + ((MergeAppendState *) planstate)->mergeplans, + ancestors, es); + break; case T_BitmapAnd: ExplainMemberNodes(((BitmapAnd *) plan)->bitmapplans, ((BitmapAndState *) planstate)->bitmapplans, @@ -1265,7 +1283,9 @@ show_plan_tlist(PlanState *planstate, List *ancestors, ExplainState *es) /* The tlist of an Append isn't real helpful, so suppress it */ if (IsA(plan, Append)) return; - /* Likewise for RecursiveUnion */ + /* Likewise for MergeAppend and RecursiveUnion */ + if (IsA(plan, MergeAppend)) + return; if (IsA(plan, RecursiveUnion)) return; @@ -1369,8 +1389,31 @@ static void show_sort_keys(SortState *sortstate, List *ancestors, ExplainState *es) { Sort *plan = (Sort *) sortstate->ss.ps.plan; - int nkeys = plan->numCols; - AttrNumber *keycols = plan->sortColIdx; + + show_sort_keys_common((PlanState *) sortstate, + plan->numCols, plan->sortColIdx, + ancestors, es); +} + +/* + * Likewise, for a MergeAppend node. + */ +static void +show_merge_append_keys(MergeAppendState *mstate, List *ancestors, + ExplainState *es) +{ + MergeAppend *plan = (MergeAppend *) mstate->ps.plan; + + show_sort_keys_common((PlanState *) mstate, + plan->numCols, plan->sortColIdx, + ancestors, es); +} + +static void +show_sort_keys_common(PlanState *planstate, int nkeys, AttrNumber *keycols, + List *ancestors, ExplainState *es) +{ + Plan *plan = planstate->plan; List *context; List *result = NIL; bool useprefix; @@ -1381,7 +1424,7 @@ show_sort_keys(SortState *sortstate, List *ancestors, ExplainState *es) return; /* Set up deparsing context */ - context = deparse_context_for_planstate((Node *) sortstate, + context = deparse_context_for_planstate((Node *) planstate, ancestors, es->rtable); useprefix = (list_length(es->rtable) > 1 || es->verbose); @@ -1390,7 +1433,7 @@ show_sort_keys(SortState *sortstate, List *ancestors, ExplainState *es) { /* find key expression in tlist */ AttrNumber keyresno = keycols[keyno]; - TargetEntry *target = get_tle_by_resno(plan->plan.targetlist, + TargetEntry *target = get_tle_by_resno(plan->targetlist, keyresno); if (!target) @@ -1603,8 +1646,8 @@ ExplainScanTarget(Scan *plan, ExplainState *es) } /* - * Explain the constituent plans of a ModifyTable, Append, BitmapAnd, - * or BitmapOr node. + * Explain the constituent plans of a ModifyTable, Append, MergeAppend, + * BitmapAnd, or BitmapOr node. * * The ancestors list should already contain the immediate parent of these * plans. diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index f260bcfd648..da4fbb440bc 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -18,7 +18,7 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execJunk.o execMain.o \ nodeBitmapAnd.o nodeBitmapOr.o \ nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeHash.o \ nodeHashjoin.o nodeIndexscan.o nodeLimit.o nodeLockRows.o \ - nodeMaterial.o nodeMergejoin.o nodeModifyTable.o \ + nodeMaterial.o nodeMergeAppend.o nodeMergejoin.o nodeModifyTable.o \ nodeNestloop.o nodeFunctionscan.o nodeRecursiveunion.o nodeResult.o \ nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \ nodeValuesscan.o nodeCtescan.o nodeWorktablescan.o \ diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 20a50192e0f..d999a4481d2 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -30,6 +30,7 @@ #include "executor/nodeLimit.h" #include "executor/nodeLockRows.h" #include "executor/nodeMaterial.h" +#include "executor/nodeMergeAppend.h" #include "executor/nodeMergejoin.h" #include "executor/nodeModifyTable.h" #include "executor/nodeNestloop.h" @@ -129,6 +130,10 @@ ExecReScan(PlanState *node) ExecReScanAppend((AppendState *) node); break; + case T_MergeAppendState: + ExecReScanMergeAppend((MergeAppendState *) node); + break; + case T_RecursiveUnionState: ExecReScanRecursiveUnion((RecursiveUnionState *) node); break; diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c index 79a1425a974..cf552a016a5 100644 --- a/src/backend/executor/execCurrent.c +++ b/src/backend/executor/execCurrent.c @@ -296,6 +296,29 @@ search_plan_tree(PlanState *node, Oid table_oid) } /* + * Similarly for MergeAppend + */ + case T_MergeAppendState: + { + MergeAppendState *mstate = (MergeAppendState *) node; + ScanState *result = NULL; + int i; + + for (i = 0; i < mstate->ms_nplans; i++) + { + ScanState *elem = search_plan_tree(mstate->mergeplans[i], + table_oid); + + if (!elem) + continue; + if (result) + return NULL; /* multiple matches */ + result = elem; + } + return result; + } + + /* * Result and Limit can be descended through (these are safe * because they always return their input's current row) */ diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 64cb701e416..edd175e1299 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -93,6 +93,7 @@ #include "executor/nodeLimit.h" #include "executor/nodeLockRows.h" #include "executor/nodeMaterial.h" +#include "executor/nodeMergeAppend.h" #include "executor/nodeMergejoin.h" #include "executor/nodeModifyTable.h" #include "executor/nodeNestloop.h" @@ -158,6 +159,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags) estate, eflags); break; + case T_MergeAppend: + result = (PlanState *) ExecInitMergeAppend((MergeAppend *) node, + estate, eflags); + break; + case T_RecursiveUnion: result = (PlanState *) ExecInitRecursiveUnion((RecursiveUnion *) node, estate, eflags); @@ -363,6 +369,10 @@ ExecProcNode(PlanState *node) result = ExecAppend((AppendState *) node); break; + case T_MergeAppendState: + result = ExecMergeAppend((MergeAppendState *) node); + break; + case T_RecursiveUnionState: result = ExecRecursiveUnion((RecursiveUnionState *) node); break; @@ -581,6 +591,10 @@ ExecEndNode(PlanState *node) ExecEndAppend((AppendState *) node); break; + case T_MergeAppendState: + ExecEndMergeAppend((MergeAppendState *) node); + break; + case T_RecursiveUnionState: ExecEndRecursiveUnion((RecursiveUnionState *) node); break; diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c new file mode 100644 index 00000000000..9ded25d8fcf --- /dev/null +++ b/src/backend/executor/nodeMergeAppend.c @@ -0,0 +1,392 @@ +/*------------------------------------------------------------------------- + * + * nodeMergeAppend.c + * routines to handle MergeAppend nodes. + * + * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/executor/nodeMergeAppend.c + * + *------------------------------------------------------------------------- + */ +/* INTERFACE ROUTINES + * ExecInitMergeAppend - initialize the MergeAppend node + * ExecMergeAppend - retrieve the next tuple from the node + * ExecEndMergeAppend - shut down the MergeAppend node + * ExecReScanMergeAppend - rescan the MergeAppend node + * + * NOTES + * A MergeAppend node contains a list of one or more subplans. + * These are each expected to deliver tuples that are sorted according + * to a common sort key. The MergeAppend node merges these streams + * to produce output sorted the same way. + * + * MergeAppend nodes don't make use of their left and right + * subtrees, rather they maintain a list of subplans so + * a typical MergeAppend node looks like this in the plan tree: + * + * ... + * / + * MergeAppend---+------+------+--- nil + * / \ | | | + * nil nil ... ... ... + * subplans + */ + +#include "postgres.h" + +#include "access/nbtree.h" +#include "executor/execdebug.h" +#include "executor/nodeMergeAppend.h" +#include "utils/lsyscache.h" + +/* + * It gets quite confusing having a heap array (indexed by integers) which + * contains integers which index into the slots array. These typedefs try to + * clear it up, but they're only documentation. + */ +typedef int SlotNumber; +typedef int HeapPosition; + +static void heap_insert_slot(MergeAppendState *node, SlotNumber new_slot); +static void heap_siftup_slot(MergeAppendState *node); +static int32 heap_compare_slots(MergeAppendState *node, SlotNumber slot1, SlotNumber slot2); + + +/* ---------------------------------------------------------------- + * ExecInitMergeAppend + * + * Begin all of the subscans of the MergeAppend node. + * ---------------------------------------------------------------- + */ +MergeAppendState * +ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) +{ + MergeAppendState *mergestate = makeNode(MergeAppendState); + PlanState **mergeplanstates; + int nplans; + int i; + ListCell *lc; + + /* check for unsupported flags */ + Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); + + /* + * Set up empty vector of subplan states + */ + nplans = list_length(node->mergeplans); + + mergeplanstates = (PlanState **) palloc0(nplans * sizeof(PlanState *)); + + /* + * create new MergeAppendState for our node + */ + mergestate->ps.plan = (Plan *) node; + mergestate->ps.state = estate; + mergestate->mergeplans = mergeplanstates; + mergestate->ms_nplans = nplans; + + mergestate->ms_slots = (TupleTableSlot **) palloc0(sizeof(TupleTableSlot *) * nplans); + mergestate->ms_heap = (int *) palloc0(sizeof(int) * nplans); + + /* + * Miscellaneous initialization + * + * MergeAppend plans don't have expression contexts because they never + * call ExecQual or ExecProject. + */ + + /* + * MergeAppend nodes do have Result slots, which hold pointers to tuples, + * so we have to initialize them. + */ + ExecInitResultTupleSlot(estate, &mergestate->ps); + + /* + * call ExecInitNode on each of the plans to be executed and save the + * results into the array "mergeplans". + */ + i = 0; + foreach(lc, node->mergeplans) + { + Plan *initNode = (Plan *) lfirst(lc); + + mergeplanstates[i] = ExecInitNode(initNode, estate, eflags); + i++; + } + + /* + * initialize output tuple type + */ + ExecAssignResultTypeFromTL(&mergestate->ps); + mergestate->ps.ps_ProjInfo = NULL; + + /* + * initialize sort-key information + */ + mergestate->ms_nkeys = node->numCols; + mergestate->ms_scankeys = palloc0(sizeof(ScanKeyData) * node->numCols); + + for (i = 0; i < node->numCols; i++) + { + Oid sortFunction; + bool reverse; + + if (!get_compare_function_for_ordering_op(node->sortOperators[i], + &sortFunction, &reverse)) + elog(ERROR, "operator %u is not a valid ordering operator", + node->sortOperators[i]); + + /* + * We needn't fill in sk_strategy or sk_subtype since these scankeys + * will never be passed to an index. + */ + ScanKeyInit(&mergestate->ms_scankeys[i], + node->sortColIdx[i], + InvalidStrategy, + sortFunction, + (Datum) 0); + + /* However, we use btree's conventions for encoding directionality */ + if (reverse) + mergestate->ms_scankeys[i].sk_flags |= SK_BT_DESC; + if (node->nullsFirst[i]) + mergestate->ms_scankeys[i].sk_flags |= SK_BT_NULLS_FIRST; + } + + /* + * initialize to show we have not run the subplans yet + */ + mergestate->ms_heap_size = 0; + mergestate->ms_initialized = false; + mergestate->ms_last_slot = -1; + + return mergestate; +} + +/* ---------------------------------------------------------------- + * ExecMergeAppend + * + * Handles iteration over multiple subplans. + * ---------------------------------------------------------------- + */ +TupleTableSlot * +ExecMergeAppend(MergeAppendState *node) +{ + TupleTableSlot *result; + SlotNumber i; + + if (!node->ms_initialized) + { + /* + * First time through: pull the first tuple from each subplan, + * and set up the heap. + */ + for (i = 0; i < node->ms_nplans; i++) + { + node->ms_slots[i] = ExecProcNode(node->mergeplans[i]); + if (!TupIsNull(node->ms_slots[i])) + heap_insert_slot(node, i); + } + node->ms_initialized = true; + } + else + { + /* + * Otherwise, pull the next tuple from whichever subplan we returned + * from last time, and insert it into the heap. (We could simplify + * the logic a bit by doing this before returning from the prior call, + * but it's better to not pull tuples until necessary.) + */ + i = node->ms_last_slot; + node->ms_slots[i] = ExecProcNode(node->mergeplans[i]); + if (!TupIsNull(node->ms_slots[i])) + heap_insert_slot(node, i); + } + + if (node->ms_heap_size > 0) + { + /* Return the topmost heap node, and sift up the remaining nodes */ + i = node->ms_heap[0]; + result = node->ms_slots[i]; + node->ms_last_slot = i; + heap_siftup_slot(node); + } + else + { + /* All the subplans are exhausted, and so is the heap */ + result = ExecClearTuple(node->ps.ps_ResultTupleSlot); + } + + return result; +} + +/* + * Insert a new slot into the heap. The slot must contain a valid tuple. + */ +static void +heap_insert_slot(MergeAppendState *node, SlotNumber new_slot) +{ + SlotNumber *heap = node->ms_heap; + HeapPosition j; + + Assert(!TupIsNull(node->ms_slots[new_slot])); + + j = node->ms_heap_size++; /* j is where the "hole" is */ + while (j > 0) + { + int i = (j-1)/2; + + if (heap_compare_slots(node, new_slot, node->ms_heap[i]) >= 0) + break; + heap[j] = heap[i]; + j = i; + } + heap[j] = new_slot; +} + +/* + * Delete the heap top (the slot in heap[0]), and sift up. + */ +static void +heap_siftup_slot(MergeAppendState *node) +{ + SlotNumber *heap = node->ms_heap; + HeapPosition i, + n; + + if (--node->ms_heap_size <= 0) + return; + n = node->ms_heap_size; /* heap[n] needs to be reinserted */ + i = 0; /* i is where the "hole" is */ + for (;;) + { + int j = 2 * i + 1; + + if (j >= n) + break; + if (j+1 < n && heap_compare_slots(node, heap[j], heap[j+1]) > 0) + j++; + if (heap_compare_slots(node, heap[n], heap[j]) <= 0) + break; + heap[i] = heap[j]; + i = j; + } + heap[i] = heap[n]; +} + +/* + * Compare the tuples in the two given slots. + */ +static int32 +heap_compare_slots(MergeAppendState *node, SlotNumber slot1, SlotNumber slot2) +{ + TupleTableSlot *s1 = node->ms_slots[slot1]; + TupleTableSlot *s2 = node->ms_slots[slot2]; + int nkey; + + Assert(!TupIsNull(s1)); + Assert(!TupIsNull(s2)); + + for (nkey = 0; nkey < node->ms_nkeys; nkey++) + { + ScanKey scankey = node->ms_scankeys + nkey; + AttrNumber attno = scankey->sk_attno; + Datum datum1, + datum2; + bool isNull1, + isNull2; + int32 compare; + + datum1 = slot_getattr(s1, attno, &isNull1); + datum2 = slot_getattr(s2, attno, &isNull2); + + if (isNull1) + { + if (isNull2) + continue; /* NULL "=" NULL */ + else if (scankey->sk_flags & SK_BT_NULLS_FIRST) + return -1; /* NULL "<" NOT_NULL */ + else + return 1; /* NULL ">" NOT_NULL */ + } + else if (isNull2) + { + if (scankey->sk_flags & SK_BT_NULLS_FIRST) + return 1; /* NOT_NULL ">" NULL */ + else + return -1; /* NOT_NULL "<" NULL */ + } + else + { + compare = DatumGetInt32(FunctionCall2(&scankey->sk_func, + datum1, datum2)); + if (compare != 0) + { + if (scankey->sk_flags & SK_BT_DESC) + compare = -compare; + return compare; + } + } + } + return 0; +} + +/* ---------------------------------------------------------------- + * ExecEndMergeAppend + * + * Shuts down the subscans of the MergeAppend node. + * + * Returns nothing of interest. + * ---------------------------------------------------------------- + */ +void +ExecEndMergeAppend(MergeAppendState *node) +{ + PlanState **mergeplans; + int nplans; + int i; + + /* + * get information from the node + */ + mergeplans = node->mergeplans; + nplans = node->ms_nplans; + + /* + * shut down each of the subscans + */ + for (i = 0; i < nplans; i++) + ExecEndNode(mergeplans[i]); +} + +void +ExecReScanMergeAppend(MergeAppendState *node) +{ + int i; + + for (i = 0; i < node->ms_nplans; i++) + { + PlanState *subnode = node->mergeplans[i]; + + /* + * ExecReScan doesn't know about my subplans, so I have to do + * changed-parameter signaling myself. + */ + if (node->ps.chgParam != NULL) + UpdateChangedParamSet(subnode, node->ps.chgParam); + + /* + * If chgParam of subnode is not null then plan will be re-scanned by + * first ExecProcNode. + */ + if (subnode->chgParam == NULL) + ExecReScan(subnode); + } + node->ms_heap_size = 0; + node->ms_initialized = false; + node->ms_last_slot = -1; +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 2118a333a39..6ce3984bff0 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -203,6 +203,31 @@ _copyAppend(Append *from) } /* + * _copyMergeAppend + */ +static MergeAppend * +_copyMergeAppend(MergeAppend *from) +{ + MergeAppend *newnode = makeNode(MergeAppend); + + /* + * copy node superclass fields + */ + CopyPlanFields((Plan *) from, (Plan *) newnode); + + /* + * copy remainder of node + */ + COPY_NODE_FIELD(mergeplans); + COPY_SCALAR_FIELD(numCols); + COPY_POINTER_FIELD(sortColIdx, from->numCols * sizeof(AttrNumber)); + COPY_POINTER_FIELD(sortOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(nullsFirst, from->numCols * sizeof(bool)); + + return newnode; +} + +/* * _copyRecursiveUnion */ static RecursiveUnion * @@ -3625,6 +3650,9 @@ copyObject(void *from) case T_Append: retval = _copyAppend(from); break; + case T_MergeAppend: + retval = _copyMergeAppend(from); + break; case T_RecursiveUnion: retval = _copyRecursiveUnion(from); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 55665ca20e5..ee2aeb0ad30 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -345,6 +345,32 @@ _outAppend(StringInfo str, Append *node) } static void +_outMergeAppend(StringInfo str, MergeAppend *node) +{ + int i; + + WRITE_NODE_TYPE("MERGEAPPEND"); + + _outPlanInfo(str, (Plan *) node); + + WRITE_NODE_FIELD(mergeplans); + + WRITE_INT_FIELD(numCols); + + appendStringInfo(str, " :sortColIdx"); + for (i = 0; i < node->numCols; i++) + appendStringInfo(str, " %d", node->sortColIdx[i]); + + appendStringInfo(str, " :sortOperators"); + for (i = 0; i < node->numCols; i++) + appendStringInfo(str, " %u", node->sortOperators[i]); + + appendStringInfo(str, " :nullsFirst"); + for (i = 0; i < node->numCols; i++) + appendStringInfo(str, " %s", booltostr(node->nullsFirst[i])); +} + +static void _outRecursiveUnion(StringInfo str, RecursiveUnion *node) { int i; @@ -1460,6 +1486,16 @@ _outAppendPath(StringInfo str, AppendPath *node) } static void +_outMergeAppendPath(StringInfo str, MergeAppendPath *node) +{ + WRITE_NODE_TYPE("MERGEAPPENDPATH"); + + _outPathInfo(str, (Path *) node); + + WRITE_NODE_FIELD(subpaths); +} + +static void _outResultPath(StringInfo str, ResultPath *node) { WRITE_NODE_TYPE("RESULTPATH"); @@ -2498,6 +2534,9 @@ _outNode(StringInfo str, void *obj) case T_Append: _outAppend(str, obj); break; + case T_MergeAppend: + _outMergeAppend(str, obj); + break; case T_RecursiveUnion: _outRecursiveUnion(str, obj); break; @@ -2745,6 +2784,9 @@ _outNode(StringInfo str, void *obj) case T_AppendPath: _outAppendPath(str, obj); break; + case T_MergeAppendPath: + _outMergeAppendPath(str, obj); + break; case T_ResultPath: _outResultPath(str, obj); break; diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index fc793ac8ae8..d6402cf9118 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -343,11 +343,12 @@ RelOptInfo - a relation or joined relations join clauses) Path - every way to generate a RelOptInfo(sequential,index,joins) - SeqScan - a plain Path node with pathtype = T_SeqScan - IndexPath - index scans + SeqScan - represents a sequential scan plan + IndexPath - index scan BitmapHeapPath - top of a bitmapped index scan TidPath - scan by CTID AppendPath - append multiple subpaths together + MergeAppendPath - merge multiple subpaths, preserving their common sort order ResultPath - a Result plan node (used for FROM-less SELECT) MaterialPath - a Material plan node UniquePath - remove duplicate rows diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index b7cf0b815b7..aa9a90cbfa2 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -51,6 +51,7 @@ static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); +static List *accumulate_append_subpath(List *subpaths, Path *path); static void set_dummy_rel_pathlist(RelOptInfo *rel); static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); @@ -283,7 +284,9 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { int parentRTindex = rti; + List *live_childrels = NIL; List *subpaths = NIL; + List *all_child_pathkeys = NIL; double parent_rows; double parent_size; double *parent_attrsizes; @@ -321,7 +324,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *childrel; List *childquals; Node *childqual; - Path *childpath; + ListCell *lcp; ListCell *parentvars; ListCell *childvars; @@ -395,13 +398,15 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, /* * We have to make child entries in the EquivalenceClass data - * structures as well. + * structures as well. This is needed either if the parent + * participates in some eclass joins (because we will want to + * consider inner-indexscan joins on the individual children) + * or if the parent has useful pathkeys (because we should try + * to build MergeAppend paths that produce those sort orderings). */ - if (rel->has_eclass_joins) - { + if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) add_child_rel_equivalences(root, appinfo, rel, childrel); - childrel->has_eclass_joins = true; - } + childrel->has_eclass_joins = rel->has_eclass_joins; /* * Note: we could compute appropriate attr_needed data for the child's @@ -411,23 +416,52 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, * otherrels. So we just leave the child's attr_needed empty. */ + /* Remember which childrels are live, for MergeAppend logic below */ + live_childrels = lappend(live_childrels, childrel); + /* * Compute the child's access paths, and add the cheapest one to the * Append path we are constructing for the parent. - * - * It's possible that the child is itself an appendrel, in which case - * we can "cut out the middleman" and just add its child paths to our - * own list. (We don't try to do this earlier because we need to - * apply both levels of transformation to the quals.) */ set_rel_pathlist(root, childrel, childRTindex, childRTE); - childpath = childrel->cheapest_total_path; - if (IsA(childpath, AppendPath)) - subpaths = list_concat(subpaths, - list_copy(((AppendPath *) childpath)->subpaths)); - else - subpaths = lappend(subpaths, childpath); + subpaths = accumulate_append_subpath(subpaths, + childrel->cheapest_total_path); + + /* + * Collect a list of all the available path orderings for all the + * children. We use this as a heuristic to indicate which sort + * orderings we should build MergeAppend paths for. + */ + foreach(lcp, childrel->pathlist) + { + Path *childpath = (Path *) lfirst(lcp); + List *childkeys = childpath->pathkeys; + ListCell *lpk; + bool found = false; + + /* Ignore unsorted paths */ + if (childkeys == NIL) + continue; + + /* Have we already seen this ordering? */ + foreach(lpk, all_child_pathkeys) + { + List *existing_pathkeys = (List *) lfirst(lpk); + + if (compare_pathkeys(existing_pathkeys, + childkeys) == PATHKEYS_EQUAL) + { + found = true; + break; + } + } + if (!found) + { + /* No, so add it to all_child_pathkeys */ + all_child_pathkeys = lappend(all_child_pathkeys, childkeys); + } + } /* * Accumulate size information from each child. @@ -483,17 +517,107 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, pfree(parent_attrsizes); /* - * Finally, build Append path and install it as the only access path for - * the parent rel. (Note: this is correct even if we have zero or one - * live subpath due to constraint exclusion.) + * Next, build an unordered Append path for the rel. (Note: this is + * correct even if we have zero or one live subpath due to constraint + * exclusion.) */ add_path(rel, (Path *) create_append_path(rel, subpaths)); - /* Select cheapest path (pretty easy in this case...) */ + /* + * Next, build MergeAppend paths based on the collected list of child + * pathkeys. We consider both cheapest-startup and cheapest-total + * cases, ie, for each interesting ordering, collect all the cheapest + * startup subpaths and all the cheapest total paths, and build a + * MergeAppend path for each list. + */ + foreach(l, all_child_pathkeys) + { + List *pathkeys = (List *) lfirst(l); + List *startup_subpaths = NIL; + List *total_subpaths = NIL; + bool startup_neq_total = false; + ListCell *lcr; + + /* Select the child paths for this ordering... */ + foreach(lcr, live_childrels) + { + RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); + Path *cheapest_startup, + *cheapest_total; + + /* Locate the right paths, if they are available. */ + cheapest_startup = + get_cheapest_path_for_pathkeys(childrel->pathlist, + pathkeys, + STARTUP_COST); + cheapest_total = + get_cheapest_path_for_pathkeys(childrel->pathlist, + pathkeys, + TOTAL_COST); + + /* + * If we can't find any paths with the right order just add the + * cheapest-total path; we'll have to sort it. + */ + if (cheapest_startup == NULL) + cheapest_startup = childrel->cheapest_total_path; + if (cheapest_total == NULL) + cheapest_total = childrel->cheapest_total_path; + + /* + * Notice whether we actually have different paths for the + * "cheapest" and "total" cases; frequently there will be no + * point in two create_merge_append_path() calls. + */ + if (cheapest_startup != cheapest_total) + startup_neq_total = true; + + startup_subpaths = + accumulate_append_subpath(startup_subpaths, cheapest_startup); + total_subpaths = + accumulate_append_subpath(total_subpaths, cheapest_total); + } + + /* ... and build the MergeAppend paths */ + add_path(rel, (Path *) create_merge_append_path(root, + rel, + startup_subpaths, + pathkeys)); + if (startup_neq_total) + add_path(rel, (Path *) create_merge_append_path(root, + rel, + total_subpaths, + pathkeys)); + } + + /* Select cheapest path */ set_cheapest(rel); } /* + * accumulate_append_subpath + * Add a subpath to the list being built for an Append or MergeAppend + * + * It's possible that the child is itself an Append path, in which case + * we can "cut out the middleman" and just add its child paths to our + * own list. (We don't try to do this earlier because we need to + * apply both levels of transformation to the quals.) + */ +static List * +accumulate_append_subpath(List *subpaths, Path *path) +{ + if (IsA(path, AppendPath)) + { + AppendPath *apath = (AppendPath *) path; + + /* list_copy is important here to avoid sharing list substructure */ + return list_concat(subpaths, list_copy(apath->subpaths)); + } + else + return lappend(subpaths, path); +} + +/* * set_dummy_rel_pathlist * Build a dummy path for a relation that's been excluded by constraints * @@ -1385,6 +1509,9 @@ print_path(PlannerInfo *root, Path *path, int indent) case T_AppendPath: ptype = "Append"; break; + case T_MergeAppendPath: + ptype = "MergeAppend"; + break; case T_ResultPath: ptype = "Result"; break; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index b27dc53feff..067cbca1254 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1210,6 +1210,70 @@ cost_sort(Path *path, PlannerInfo *root, } /* + * cost_merge_append + * Determines and returns the cost of a MergeAppend node. + * + * MergeAppend merges several pre-sorted input streams, using a heap that + * at any given instant holds the next tuple from each stream. If there + * are N streams, we need about N*log2(N) tuple comparisons to construct + * the heap at startup, and then for each output tuple, about log2(N) + * comparisons to delete the top heap entry and another log2(N) comparisons + * to insert its successor from the same stream. + * + * (The effective value of N will drop once some of the input streams are + * exhausted, but it seems unlikely to be worth trying to account for that.) + * + * The heap is never spilled to disk, since we assume N is not very large. + * So this is much simpler than cost_sort. + * + * As in cost_sort, we charge two operator evals per tuple comparison. + * + * 'pathkeys' is a list of sort keys + * 'n_streams' is the number of input streams + * 'input_startup_cost' is the sum of the input streams' startup costs + * 'input_total_cost' is the sum of the input streams' total costs + * 'tuples' is the number of tuples in all the streams + */ +void +cost_merge_append(Path *path, PlannerInfo *root, + List *pathkeys, int n_streams, + Cost input_startup_cost, Cost input_total_cost, + double tuples) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + Cost comparison_cost; + double N; + double logN; + + /* + * Avoid log(0)... + */ + N = (n_streams < 2) ? 2.0 : (double) n_streams; + logN = LOG2(N); + + /* Assumed cost per tuple comparison */ + comparison_cost = 2.0 * cpu_operator_cost; + + /* Heap creation cost */ + startup_cost += comparison_cost * N * logN; + + /* Per-tuple heap maintenance cost */ + run_cost += tuples * comparison_cost * 2.0 * logN; + + /* + * Also charge a small amount (arbitrarily set equal to operator cost) per + * extracted tuple. We don't charge cpu_tuple_cost because a MergeAppend + * node doesn't do qual-checking or projection, so it has less overhead + * than most plan nodes. + */ + run_cost += cpu_operator_cost * tuples; + + path->startup_cost = startup_cost + input_startup_cost; + path->total_cost = startup_cost + run_cost + input_total_cost; +} + +/* * cost_material * Determines and returns the cost of materializing a relation, including * the cost of reading the input data. @@ -1405,7 +1469,9 @@ cost_group(Path *path, PlannerInfo *root, * output row count, which may be lower than the restriction-clause-only row * count of its parent. (We don't include this case in the PATH_ROWS macro * because it applies *only* to a nestloop's inner relation.) We have to - * be prepared to recurse through Append nodes in case of an appendrel. + * be prepared to recurse through Append or MergeAppend nodes in case of an + * appendrel. (It's not clear MergeAppend can be seen here, but we may as + * well handle it if so.) */ static double nestloop_inner_path_rows(Path *path) @@ -1426,6 +1492,16 @@ nestloop_inner_path_rows(Path *path) result += nestloop_inner_path_rows((Path *) lfirst(l)); } } + else if (IsA(path, MergeAppendPath)) + { + ListCell *l; + + result = 0; + foreach(l, ((MergeAppendPath *) path)->subpaths) + { + result += nestloop_inner_path_rows((Path *) lfirst(l)); + } + } else result = PATH_ROWS(path); diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index a20ed5f36c7..e44e960b545 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -1611,8 +1611,8 @@ exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2) * Search for EC members that reference (only) the parent_rel, and * add transformed members referencing the child_rel. * - * We only need to do this for ECs that could generate join conditions, - * since the child members are only used for creating inner-indexscan paths. + * Note that this function won't be called at all unless we have at least some + * reason to believe that the EC members it generates will be useful. * * parent_rel and child_rel could be derived from appinfo, but since the * caller has already computed them, we might as well just pass them in. @@ -1631,10 +1631,14 @@ add_child_rel_equivalences(PlannerInfo *root, ListCell *lc2; /* - * Won't generate joinclauses if const or single-member (the latter - * test covers the volatile case too) + * If this EC contains a constant, then it's not useful for sorting + * or driving an inner index-scan, so we skip generating child EMs. + * + * If this EC contains a volatile expression, then generating child + * EMs would be downright dangerous. We rely on a volatile EC having + * only one EM. */ - if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1) + if (cur_ec->ec_has_const || cur_ec->ec_has_volatile) continue; /* No point in searching if parent rel not mentioned in eclass */ diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 2c398d2eed9..52dd27b1583 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -25,6 +25,7 @@ #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" +#include "optimizer/paths.h" #include "optimizer/plancat.h" #include "optimizer/planmain.h" #include "optimizer/predtest.h" @@ -45,6 +46,7 @@ static void disuse_physical_tlist(Plan *plan, Path *path); static Plan *create_gating_plan(PlannerInfo *root, Plan *plan, List *quals); static Plan *create_join_plan(PlannerInfo *root, JoinPath *best_path); static Plan *create_append_plan(PlannerInfo *root, AppendPath *best_path); +static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path); static Result *create_result_plan(PlannerInfo *root, ResultPath *best_path); static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path); static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path); @@ -134,6 +136,13 @@ static MergeJoin *make_mergejoin(List *tlist, static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols, AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst, double limit_tuples); +static Plan *prepare_sort_from_pathkeys(PlannerInfo *root, + Plan *lefttree, List *pathkeys, + bool adjust_tlist_in_place, + int *p_numsortkeys, + AttrNumber **p_sortColIdx, + Oid **p_sortOperators, + bool **p_nullsFirst); static Material *make_material(Plan *lefttree); @@ -203,6 +212,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path) plan = create_append_plan(root, (AppendPath *) best_path); break; + case T_MergeAppend: + plan = create_merge_append_plan(root, + (MergeAppendPath *) best_path); + break; case T_Result: plan = (Plan *) create_result_plan(root, (ResultPath *) best_path); @@ -620,6 +633,97 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) } /* + * create_merge_append_plan + * Create a MergeAppend plan for 'best_path' and (recursively) plans + * for its subpaths. + * + * Returns a Plan node. + */ +static Plan * +create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) +{ + MergeAppend *node = makeNode(MergeAppend); + Plan *plan = &node->plan; + List *tlist = build_relation_tlist(best_path->path.parent); + List *pathkeys = best_path->path.pathkeys; + List *subplans = NIL; + ListCell *subpaths; + + /* + * We don't have the actual creation of the MergeAppend node split out + * into a separate make_xxx function. This is because we want to run + * prepare_sort_from_pathkeys on it before we do so on the individual + * child plans, to make cross-checking the sort info easier. + */ + copy_path_costsize(plan, (Path *) best_path); + plan->targetlist = tlist; + plan->qual = NIL; + plan->lefttree = NULL; + plan->righttree = NULL; + + /* Compute sort column info, and adjust MergeAppend's tlist as needed */ + (void) prepare_sort_from_pathkeys(root, plan, pathkeys, + true, + &node->numCols, + &node->sortColIdx, + &node->sortOperators, + &node->nullsFirst); + + /* + * Now prepare the child plans. We must apply prepare_sort_from_pathkeys + * even to subplans that don't need an explicit sort, to make sure they + * are returning the same sort key columns the MergeAppend expects. + */ + foreach(subpaths, best_path->subpaths) + { + Path *subpath = (Path *) lfirst(subpaths); + Plan *subplan; + int numsortkeys; + AttrNumber *sortColIdx; + Oid *sortOperators; + bool *nullsFirst; + + /* Build the child plan */ + subplan = create_plan_recurse(root, subpath); + + /* Compute sort column info, and adjust subplan's tlist as needed */ + subplan = prepare_sort_from_pathkeys(root, subplan, pathkeys, + false, + &numsortkeys, + &sortColIdx, + &sortOperators, + &nullsFirst); + + /* + * Check that we got the same sort key information. We just Assert + * that the sortops match, since those depend only on the pathkeys; + * but it seems like a good idea to check the sort column numbers + * explicitly, to ensure the tlists really do match up. + */ + Assert(numsortkeys == node->numCols); + if (memcmp(sortColIdx, node->sortColIdx, + numsortkeys * sizeof(AttrNumber)) != 0) + elog(ERROR, "MergeAppend child's targetlist doesn't match MergeAppend"); + Assert(memcmp(sortOperators, node->sortOperators, + numsortkeys * sizeof(Oid)) == 0); + Assert(memcmp(nullsFirst, node->nullsFirst, + numsortkeys * sizeof(bool)) == 0); + + /* Now, insert a Sort node if subplan isn't sufficiently ordered */ + if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + subplan = (Plan *) make_sort(root, subplan, numsortkeys, + sortColIdx, sortOperators, nullsFirst, + -1.0); + + subplans = lappend(subplans, subplan); + } + + node->mergeplans = subplans; + + return (Plan *) node; +} + +/* * create_result_plan * Create a Result plan for 'best_path'. * This is only used for the case of a query with an empty jointree. @@ -2502,7 +2606,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) /* * Copy cost and size info from a Path node to the Plan node created from it. - * The executor won't use this info, but it's needed by EXPLAIN. + * The executor usually won't use this info, but it's needed by EXPLAIN. */ static void copy_path_costsize(Plan *dest, Path *src) @@ -2525,9 +2629,7 @@ copy_path_costsize(Plan *dest, Path *src) /* * Copy cost and size info from a lower plan node to an inserted node. - * This is not critical, since the decisions have already been made, - * but it helps produce more reasonable-looking EXPLAIN output. - * (Some callers alter the info after copying it.) + * (Most callers alter the info after copying it.) */ static void copy_plan_costsize(Plan *dest, Plan *src) @@ -2796,6 +2898,12 @@ make_append(List *appendplans, List *tlist) * Compute cost as sum of subplan costs. We charge nothing extra for the * Append itself, which perhaps is too optimistic, but since it doesn't do * any selection or projection, it is a pretty cheap node. + * + * If you change this, see also create_append_path(). Also, the size + * calculations should match set_append_rel_pathlist(). It'd be better + * not to duplicate all this logic, but some callers of this function + * aren't working from an appendrel or AppendPath, so there's noplace + * to copy the data from. */ plan->startup_cost = 0; plan->total_cost = 0; @@ -3102,26 +3210,41 @@ add_sort_column(AttrNumber colIdx, Oid sortOp, bool nulls_first, } /* - * make_sort_from_pathkeys - * Create sort plan to sort according to given pathkeys + * prepare_sort_from_pathkeys + * Prepare to sort according to given pathkeys + * + * This is used to set up for both Sort and MergeAppend nodes. It calculates + * the executor's representation of the sort key information, and adjusts the + * plan targetlist if needed to add resjunk sort columns. * + * Input parameters: * 'lefttree' is the node which yields input tuples * 'pathkeys' is the list of pathkeys by which the result is to be sorted - * 'limit_tuples' is the bound on the number of output tuples; - * -1 if no bound + * 'adjust_tlist_in_place' is TRUE if lefttree must be modified in-place * * We must convert the pathkey information into arrays of sort key column - * numbers and sort operator OIDs. + * numbers and sort operator OIDs, which is the representation the executor + * wants. These are returned into the output parameters *p_numsortkeys etc. * * If the pathkeys include expressions that aren't simple Vars, we will * usually need to add resjunk items to the input plan's targetlist to - * compute these expressions (since the Sort node itself won't do it). - * If the input plan type isn't one that can do projections, this means - * adding a Result node just to do the projection. + * compute these expressions, since the Sort/MergeAppend node itself won't + * do any such calculations. If the input plan type isn't one that can do + * projections, this means adding a Result node just to do the projection. + * However, the caller can pass adjust_tlist_in_place = TRUE to force the + * lefttree tlist to be modified in-place regardless of whether the node type + * can project --- we use this for fixing the tlist of MergeAppend itself. + * + * Returns the node which is to be the input to the Sort (either lefttree, + * or a Result stacked atop lefttree). */ -Sort * -make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, - double limit_tuples) +static Plan * +prepare_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, + bool adjust_tlist_in_place, + int *p_numsortkeys, + AttrNumber **p_sortColIdx, + Oid **p_sortOperators, + bool **p_nullsFirst) { List *tlist = lefttree->targetlist; ListCell *i; @@ -3185,7 +3308,12 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, { EquivalenceMember *em = (EquivalenceMember *) lfirst(j); - if (em->em_is_const || em->em_is_child) + /* + * We shouldn't be trying to sort by an equivalence class that + * contains a constant, so no need to consider such cases any + * further. + */ + if (em->em_is_const) continue; tle = tlist_member((Node *) em->em_expr, tlist); @@ -3221,7 +3349,7 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, List *exprvars; ListCell *k; - if (em->em_is_const || em->em_is_child) + if (em->em_is_const) continue; sortexpr = em->em_expr; exprvars = pull_var_clause((Node *) sortexpr, @@ -3244,7 +3372,8 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, /* * Do we need to insert a Result node? */ - if (!is_projection_capable_plan(lefttree)) + if (!adjust_tlist_in_place && + !is_projection_capable_plan(lefttree)) { /* copy needed so we don't modify input's tlist below */ tlist = copyObject(tlist); @@ -3252,6 +3381,9 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, lefttree); } + /* Don't bother testing is_projection_capable_plan again */ + adjust_tlist_in_place = true; + /* * Add resjunk entry to input's tlist */ @@ -3292,6 +3424,42 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, Assert(numsortkeys > 0); + /* Return results */ + *p_numsortkeys = numsortkeys; + *p_sortColIdx = sortColIdx; + *p_sortOperators = sortOperators; + *p_nullsFirst = nullsFirst; + + return lefttree; +} + +/* + * make_sort_from_pathkeys + * Create sort plan to sort according to given pathkeys + * + * 'lefttree' is the node which yields input tuples + * 'pathkeys' is the list of pathkeys by which the result is to be sorted + * 'limit_tuples' is the bound on the number of output tuples; + * -1 if no bound + */ +Sort * +make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, + double limit_tuples) +{ + int numsortkeys; + AttrNumber *sortColIdx; + Oid *sortOperators; + bool *nullsFirst; + + /* Compute sort column info, and adjust lefttree as needed */ + lefttree = prepare_sort_from_pathkeys(root, lefttree, pathkeys, + false, + &numsortkeys, + &sortColIdx, + &sortOperators, + &nullsFirst); + + /* Now build the Sort node */ return make_sort(root, lefttree, numsortkeys, sortColIdx, sortOperators, nullsFirst, limit_tuples); } @@ -3998,6 +4166,7 @@ is_projection_capable_plan(Plan *plan) case T_Limit: case T_ModifyTable: case T_Append: + case T_MergeAppend: case T_RecursiveUnion: return false; default: diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index d5e9212f6a2..9aef7fc35a2 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -554,6 +554,24 @@ set_plan_refs(PlannerGlobal *glob, Plan *plan, int rtoffset) } } break; + case T_MergeAppend: + { + MergeAppend *splan = (MergeAppend *) plan; + + /* + * MergeAppend, like Sort et al, doesn't actually evaluate its + * targetlist or check quals. + */ + set_dummy_tlist_references(plan, rtoffset); + Assert(splan->plan.qual == NIL); + foreach(l, splan->mergeplans) + { + lfirst(l) = set_plan_refs(glob, + (Plan *) lfirst(l), + rtoffset); + } + } + break; case T_RecursiveUnion: /* This doesn't evaluate targetlist or check quals either */ set_dummy_tlist_references(plan, rtoffset); diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index ab62e2a2342..137e2ce0491 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -2065,6 +2065,22 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, } break; + case T_MergeAppend: + { + ListCell *l; + + foreach(l, ((MergeAppend *) plan)->mergeplans) + { + context.paramids = + bms_add_members(context.paramids, + finalize_plan(root, + (Plan *) lfirst(l), + valid_params, + scan_params)); + } + } + break; + case T_BitmapAnd: { ListCell *l; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 71e0e75a569..8214acc5713 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -636,6 +636,8 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals) * create_append_path * Creates a path corresponding to an Append plan, returning the * pathnode. + * + * Note that we must handle subpaths = NIL, representing a dummy access path. */ AppendPath * create_append_path(RelOptInfo *rel, List *subpaths) @@ -649,6 +651,13 @@ create_append_path(RelOptInfo *rel, List *subpaths) * unsorted */ pathnode->subpaths = subpaths; + /* + * Compute cost as sum of subplan costs. We charge nothing extra for the + * Append itself, which perhaps is too optimistic, but since it doesn't do + * any selection or projection, it is a pretty cheap node. + * + * If you change this, see also make_append(). + */ pathnode->path.startup_cost = 0; pathnode->path.total_cost = 0; foreach(l, subpaths) @@ -664,6 +673,68 @@ create_append_path(RelOptInfo *rel, List *subpaths) } /* + * create_merge_append_path + * Creates a path corresponding to a MergeAppend plan, returning the + * pathnode. + */ +MergeAppendPath * +create_merge_append_path(PlannerInfo *root, + RelOptInfo *rel, + List *subpaths, + List *pathkeys) +{ + MergeAppendPath *pathnode = makeNode(MergeAppendPath); + Cost input_startup_cost; + Cost input_total_cost; + ListCell *l; + + pathnode->path.pathtype = T_MergeAppend; + pathnode->path.parent = rel; + pathnode->path.pathkeys = pathkeys; + pathnode->subpaths = subpaths; + + /* Add up all the costs of the input paths */ + input_startup_cost = 0; + input_total_cost = 0; + foreach(l, subpaths) + { + Path *subpath = (Path *) lfirst(l); + + if (pathkeys_contained_in(pathkeys, subpath->pathkeys)) + { + /* Subpath is adequately ordered, we won't need to sort it */ + input_startup_cost += subpath->startup_cost; + input_total_cost += subpath->total_cost; + } + else + { + /* We'll need to insert a Sort node, so include cost for that */ + Path sort_path; /* dummy for result of cost_sort */ + + cost_sort(&sort_path, + root, + pathkeys, + subpath->total_cost, + subpath->parent->tuples, + subpath->parent->width, + 0.0, + work_mem, + -1.0); + input_startup_cost += sort_path.startup_cost; + input_total_cost += sort_path.total_cost; + } + } + + /* Now we can compute total costs of the MergeAppend */ + cost_merge_append(&pathnode->path, root, + pathkeys, list_length(subpaths), + input_startup_cost, input_total_cost, + rel->tuples); + + return pathnode; +} + +/* * create_result_path * Creates a path representing a Result-and-nothing-else plan. * This is only used for the case of a query with an empty jointree. diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index f1c1d04ee09..b5437612a92 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -2174,15 +2174,17 @@ set_deparse_planstate(deparse_namespace *dpns, PlanState *ps) dpns->planstate = ps; /* - * We special-case Append to pretend that the first child plan is the - * OUTER referent; we have to interpret OUTER Vars in the Append's tlist - * according to one of the children, and the first one is the most natural - * choice. Likewise special-case ModifyTable to pretend that the first - * child plan is the OUTER referent; this is to support RETURNING lists - * containing references to non-target relations. + * We special-case Append and MergeAppend to pretend that the first child + * plan is the OUTER referent; we have to interpret OUTER Vars in their + * tlists according to one of the children, and the first one is the most + * natural choice. Likewise special-case ModifyTable to pretend that the + * first child plan is the OUTER referent; this is to support RETURNING + * lists containing references to non-target relations. */ if (IsA(ps, AppendState)) dpns->outer_planstate = ((AppendState *) ps)->appendplans[0]; + else if (IsA(ps, MergeAppendState)) + dpns->outer_planstate = ((MergeAppendState *) ps)->mergeplans[0]; else if (IsA(ps, ModifyTableState)) dpns->outer_planstate = ((ModifyTableState *) ps)->mt_plans[0]; else diff --git a/src/include/executor/nodeMergeAppend.h b/src/include/executor/nodeMergeAppend.h new file mode 100644 index 00000000000..7e7e494b71a --- /dev/null +++ b/src/include/executor/nodeMergeAppend.h @@ -0,0 +1,24 @@ +/*------------------------------------------------------------------------- + * + * nodeMergeAppend.h + * + * + * + * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/executor/nodeMergeAppend.h + * + *------------------------------------------------------------------------- + */ +#ifndef NODEMERGEAPPEND_H +#define NODEMERGEAPPEND_H + +#include "nodes/execnodes.h" + +extern MergeAppendState *ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags); +extern TupleTableSlot *ExecMergeAppend(MergeAppendState *node); +extern void ExecEndMergeAppend(MergeAppendState *node); +extern void ExecReScanMergeAppend(MergeAppendState *node); + +#endif /* NODEMERGEAPPEND_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 8a1f2ee047b..78188a3954d 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1050,6 +1050,33 @@ typedef struct AppendState } AppendState; /* ---------------- + * MergeAppendState information + * + * nplans how many plans are in the array + * nkeys number of sort key columns + * scankeys sort keys in ScanKey representation + * slots current output tuple of each subplan + * heap heap of active tuples (represented as array indexes) + * heap_size number of active heap entries + * initialized true if we have fetched first tuple from each subplan + * last_slot last subplan fetched from (which must be re-called) + * ---------------- + */ +typedef struct MergeAppendState +{ + PlanState ps; /* its first field is NodeTag */ + PlanState **mergeplans; /* array of PlanStates for my inputs */ + int ms_nplans; + int ms_nkeys; + ScanKey ms_scankeys; /* array of length ms_nkeys */ + TupleTableSlot **ms_slots; /* array of length ms_nplans */ + int *ms_heap; /* array of length ms_nplans */ + int ms_heap_size; /* current active length of ms_heap[] */ + bool ms_initialized; /* are subplans started? */ + int ms_last_slot; /* last subplan slot we returned from */ +} MergeAppendState; + +/* ---------------- * RecursiveUnionState information * * RecursiveUnionState is used for performing a recursive union. diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 0d33a2ed5ff..15dabe31ce3 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -45,6 +45,7 @@ typedef enum NodeTag T_Result, T_ModifyTable, T_Append, + T_MergeAppend, T_RecursiveUnion, T_BitmapAnd, T_BitmapOr, @@ -87,6 +88,7 @@ typedef enum NodeTag T_ResultState, T_ModifyTableState, T_AppendState, + T_MergeAppendState, T_RecursiveUnionState, T_BitmapAndState, T_BitmapOrState, @@ -215,6 +217,7 @@ typedef enum NodeTag T_HashPath, T_TidPath, T_AppendPath, + T_MergeAppendPath, T_ResultPath, T_MaterialPath, T_UniquePath, diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index f2f99f4a2b8..81038f6ad14 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -183,6 +183,22 @@ typedef struct Append } Append; /* ---------------- + * MergeAppend node - + * Merge the results of pre-sorted sub-plans to preserve the ordering. + * ---------------- + */ +typedef struct MergeAppend +{ + Plan plan; + List *mergeplans; + /* remaining fields are just like the sort-key info in struct Sort */ + int numCols; /* number of sort-key columns */ + AttrNumber *sortColIdx; /* their indexes in the target list */ + Oid *sortOperators; /* OIDs of operators to sort them by */ + bool *nullsFirst; /* NULLS FIRST/LAST directions */ +} MergeAppend; + +/* ---------------- * RecursiveUnion node - * Generate a recursive union of two subplans. * diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 91f4c5c1c43..6e3d0f35181 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -256,9 +256,9 @@ typedef struct PlannerInfo * the entire append relation. The member RTEs are otherrels. The parent * is present in the query join tree but the members are not. The member * RTEs and otherrels are used to plan the scans of the individual tables or - * subqueries of the append set; then the parent baserel is given an Append - * plan comprising the best plans for the individual member rels. (See - * comments for AppendRelInfo for more information.) + * subqueries of the append set; then the parent baserel is given Append + * and/or MergeAppend paths comprising the best paths for the individual + * member rels. (See comments for AppendRelInfo for more information.) * * At one time we also made otherrels to represent join RTEs, for use in * handling join alias Vars. Currently this is not needed because all join @@ -542,10 +542,11 @@ typedef struct EquivalenceClass * * em_is_child signifies that this element was built by transposing a member * for an inheritance parent relation to represent the corresponding expression - * on an inheritance child. The element should be ignored for all purposes - * except constructing inner-indexscan paths for the child relation. (Other - * types of join are driven from transposed joininfo-list entries.) Note - * that the EC's ec_relids field does NOT include the child relation. + * on an inheritance child. These elements are used for constructing + * inner-indexscan paths for the child relation (other types of join are + * driven from transposed joininfo-list entries) and for constructing + * MergeAppend paths for the whole inheritance tree. Note that the EC's + * ec_relids field does NOT include the child relation. * * em_datatype is usually the same as exprType(em_expr), but can be * different when dealing with a binary-compatible opfamily; in particular @@ -756,6 +757,16 @@ typedef struct AppendPath (IsA((p), AppendPath) && ((AppendPath *) (p))->subpaths == NIL) /* + * MergeAppendPath represents a MergeAppend plan, ie, the merging of sorted + * results from several member plans to produce similarly-sorted output. + */ +typedef struct MergeAppendPath +{ + Path path; + List *subpaths; /* list of component Paths */ +} MergeAppendPath; + +/* * ResultPath represents use of a Result plan node to compute a variable-free * targetlist with no underlying tables (a "SELECT expressions" query). * The query could have a WHERE clause, too, represented by "quals". diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 5a4b33f2b13..e1dcd6df140 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -86,6 +86,10 @@ extern void cost_sort(Path *path, PlannerInfo *root, List *pathkeys, Cost input_cost, double tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples); +extern void cost_merge_append(Path *path, PlannerInfo *root, + List *pathkeys, int n_streams, + Cost input_startup_cost, Cost input_total_cost, + double tuples); extern void cost_material(Path *path, Cost input_startup_cost, Cost input_total_cost, double tuples, int width); diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 5e0ebe046b3..53ebe5756b7 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -47,6 +47,10 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root, extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals); extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths); +extern MergeAppendPath *create_merge_append_path(PlannerInfo *root, + RelOptInfo *rel, + List *subpaths, + List *pathkeys); extern ResultPath *create_result_path(List *quals); extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath); extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel, diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out index 6030cba1df0..581cc7d0e70 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out @@ -1147,3 +1147,98 @@ DETAIL: drop cascades to table t2 drop cascades to table ts drop cascades to table t3 drop cascades to table t4 +-- +-- Test merge-append plans for inheritance trees +-- +create table matest0 (id serial primary key, name text); +NOTICE: CREATE TABLE will create implicit sequence "matest0_id_seq" for serial column "matest0.id" +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "matest0_pkey" for table "matest0" +create table matest1 (id integer primary key) inherits (matest0); +NOTICE: merging column "id" with inherited definition +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "matest1_pkey" for table "matest1" +create table matest2 (id integer primary key) inherits (matest0); +NOTICE: merging column "id" with inherited definition +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "matest2_pkey" for table "matest2" +create table matest3 (id integer primary key) inherits (matest0); +NOTICE: merging column "id" with inherited definition +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "matest3_pkey" for table "matest3" +create index matest0i on matest0 ((1-id)); +create index matest1i on matest1 ((1-id)); +-- create index matest2i on matest2 ((1-id)); -- intentionally missing +create index matest3i on matest3 ((1-id)); +insert into matest1 (name) values ('Test 1'); +insert into matest1 (name) values ('Test 2'); +insert into matest2 (name) values ('Test 3'); +insert into matest2 (name) values ('Test 4'); +insert into matest3 (name) values ('Test 5'); +insert into matest3 (name) values ('Test 6'); +set enable_indexscan = off; -- force use of seqscan/sort, so no merge +explain (verbose, costs off) select * from matest0 order by 1-id; + QUERY PLAN +--------------------------------------------------------------------------------- + Sort + Output: public.matest0.id, public.matest0.name, ((1 - public.matest0.id)) + Sort Key: ((1 - public.matest0.id)) + -> Result + Output: public.matest0.id, public.matest0.name, (1 - public.matest0.id) + -> Append + -> Seq Scan on public.matest0 + Output: public.matest0.id, public.matest0.name + -> Seq Scan on public.matest1 matest0 + Output: public.matest0.id, public.matest0.name + -> Seq Scan on public.matest2 matest0 + Output: public.matest0.id, public.matest0.name + -> Seq Scan on public.matest3 matest0 + Output: public.matest0.id, public.matest0.name +(14 rows) + +select * from matest0 order by 1-id; + id | name +----+-------- + 6 | Test 6 + 5 | Test 5 + 4 | Test 4 + 3 | Test 3 + 2 | Test 2 + 1 | Test 1 +(6 rows) + +reset enable_indexscan; +set enable_seqscan = off; -- plan with fewest seqscans should be merge +explain (verbose, costs off) select * from matest0 order by 1-id; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Result + Output: public.matest0.id, public.matest0.name, ((1 - public.matest0.id)) + -> Merge Append + Sort Key: ((1 - public.matest0.id)) + -> Index Scan using matest0i on public.matest0 + Output: public.matest0.id, public.matest0.name, (1 - public.matest0.id) + -> Index Scan using matest1i on public.matest1 matest0 + Output: public.matest0.id, public.matest0.name, (1 - public.matest0.id) + -> Sort + Output: public.matest0.id, public.matest0.name, ((1 - public.matest0.id)) + Sort Key: ((1 - public.matest0.id)) + -> Seq Scan on public.matest2 matest0 + Output: public.matest0.id, public.matest0.name, (1 - public.matest0.id) + -> Index Scan using matest3i on public.matest3 matest0 + Output: public.matest0.id, public.matest0.name, (1 - public.matest0.id) +(15 rows) + +select * from matest0 order by 1-id; + id | name +----+-------- + 6 | Test 6 + 5 | Test 5 + 4 | Test 4 + 3 | Test 3 + 2 | Test 2 + 1 | Test 1 +(6 rows) + +reset enable_seqscan; +drop table matest0 cascade; +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table matest1 +drop cascades to table matest2 +drop cascades to table matest3 diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql index 435b1cd3b42..e87cf66110e 100644 --- a/src/test/regress/sql/inherit.sql +++ b/src/test/regress/sql/inherit.sql @@ -373,3 +373,36 @@ SELECT a.attrelid::regclass, a.attname, a.attinhcount, e.expected ORDER BY a.attrelid::regclass::name, a.attnum; DROP TABLE t1, s1 CASCADE; + +-- +-- Test merge-append plans for inheritance trees +-- + +create table matest0 (id serial primary key, name text); +create table matest1 (id integer primary key) inherits (matest0); +create table matest2 (id integer primary key) inherits (matest0); +create table matest3 (id integer primary key) inherits (matest0); + +create index matest0i on matest0 ((1-id)); +create index matest1i on matest1 ((1-id)); +-- create index matest2i on matest2 ((1-id)); -- intentionally missing +create index matest3i on matest3 ((1-id)); + +insert into matest1 (name) values ('Test 1'); +insert into matest1 (name) values ('Test 2'); +insert into matest2 (name) values ('Test 3'); +insert into matest2 (name) values ('Test 4'); +insert into matest3 (name) values ('Test 5'); +insert into matest3 (name) values ('Test 6'); + +set enable_indexscan = off; -- force use of seqscan/sort, so no merge +explain (verbose, costs off) select * from matest0 order by 1-id; +select * from matest0 order by 1-id; +reset enable_indexscan; + +set enable_seqscan = off; -- plan with fewest seqscans should be merge +explain (verbose, costs off) select * from matest0 order by 1-id; +select * from matest0 order by 1-id; +reset enable_seqscan; + +drop table matest0 cascade; |