diff options
Diffstat (limited to 'src/backend')
24 files changed, 671 insertions, 255 deletions
diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index a6604c4c934..3a5e52dc383 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -26,11 +26,28 @@ Datum ginbeginscan(PG_FUNCTION_ARGS) { Relation rel = (Relation) PG_GETARG_POINTER(0); - int keysz = PG_GETARG_INT32(1); - ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2); + int nkeys = PG_GETARG_INT32(1); + int norderbys = PG_GETARG_INT32(2); IndexScanDesc scan; + GinScanOpaque so; + + /* no order by operators allowed */ + Assert(norderbys == 0); + + scan = RelationGetIndexScan(rel, nkeys, norderbys); + + /* allocate private workspace */ + so = (GinScanOpaque) palloc(sizeof(GinScanOpaqueData)); + so->keys = NULL; + so->nkeys = 0; + so->tempCtx = AllocSetContextCreate(CurrentMemoryContext, + "Gin scan temporary context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + initGinState(&so->ginstate, scan->indexRelation); - scan = RelationGetIndexScan(rel, keysz, scankey); + scan->opaque = so; PG_RETURN_POINTER(scan); } @@ -241,27 +258,10 @@ ginrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); - GinScanOpaque so; - - so = (GinScanOpaque) scan->opaque; - - if (so == NULL) - { - /* if called from ginbeginscan */ - so = (GinScanOpaque) palloc(sizeof(GinScanOpaqueData)); - so->tempCtx = AllocSetContextCreate(CurrentMemoryContext, - "Gin scan temporary context", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); - initGinState(&so->ginstate, scan->indexRelation); - scan->opaque = so; - } - else - { - freeScanKeys(so->keys, so->nkeys); - } + /* remaining arguments are ignored */ + GinScanOpaque so = (GinScanOpaque) scan->opaque; + freeScanKeys(so->keys, so->nkeys); so->keys = NULL; if (scankey && scan->numberOfKeys > 0) @@ -280,14 +280,11 @@ ginendscan(PG_FUNCTION_ARGS) IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); GinScanOpaque so = (GinScanOpaque) scan->opaque; - if (so != NULL) - { - freeScanKeys(so->keys, so->nkeys); + freeScanKeys(so->keys, so->nkeys); - MemoryContextDelete(so->tempCtx); + MemoryContextDelete(so->tempCtx); - pfree(so); - } + pfree(so); PG_RETURN_VOID(); } diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c index 21f4ea54b7d..106714511a8 100644 --- a/src/backend/access/gist/gistscan.c +++ b/src/backend/access/gist/gistscan.c @@ -28,10 +28,24 @@ gistbeginscan(PG_FUNCTION_ARGS) { Relation r = (Relation) PG_GETARG_POINTER(0); int nkeys = PG_GETARG_INT32(1); - ScanKey key = (ScanKey) PG_GETARG_POINTER(2); + int norderbys = PG_GETARG_INT32(2); IndexScanDesc scan; + GISTScanOpaque so; + + /* no order by operators allowed */ + Assert(norderbys == 0); + + scan = RelationGetIndexScan(r, nkeys, norderbys); + + /* initialize opaque data */ + so = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData)); + so->stack = NULL; + so->tempCxt = createTempGistContext(); + so->curbuf = InvalidBuffer; + so->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE)); + initGISTstate(so->giststate, scan->indexRelation); - scan = RelationGetIndexScan(r, nkeys, key); + scan->opaque = so; PG_RETURN_POINTER(scan); } @@ -41,33 +55,18 @@ gistrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey key = (ScanKey) PG_GETARG_POINTER(1); - GISTScanOpaque so; + /* remaining arguments are ignored */ + GISTScanOpaque so = (GISTScanOpaque) scan->opaque; int i; - so = (GISTScanOpaque) scan->opaque; - if (so != NULL) + /* rescan an existing indexscan --- reset state */ + gistfreestack(so->stack); + so->stack = NULL; + /* drop pins on buffers -- no locks held */ + if (BufferIsValid(so->curbuf)) { - /* rescan an existing indexscan --- reset state */ - gistfreestack(so->stack); - so->stack = NULL; - /* drop pins on buffers -- no locks held */ - if (BufferIsValid(so->curbuf)) - { - ReleaseBuffer(so->curbuf); - so->curbuf = InvalidBuffer; - } - } - else - { - /* initialize opaque data */ - so = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData)); - so->stack = NULL; - so->tempCxt = createTempGistContext(); + ReleaseBuffer(so->curbuf); so->curbuf = InvalidBuffer; - so->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE)); - initGISTstate(so->giststate, scan->indexRelation); - - scan->opaque = so; } /* @@ -130,21 +129,16 @@ Datum gistendscan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); - GISTScanOpaque so; - - so = (GISTScanOpaque) scan->opaque; - - if (so != NULL) - { - gistfreestack(so->stack); - if (so->giststate != NULL) - freeGISTstate(so->giststate); - /* drop pins on buffers -- we aren't holding any locks */ - if (BufferIsValid(so->curbuf)) - ReleaseBuffer(so->curbuf); - MemoryContextDelete(so->tempCxt); - pfree(scan->opaque); - } + GISTScanOpaque so = (GISTScanOpaque) scan->opaque; + + gistfreestack(so->stack); + if (so->giststate != NULL) + freeGISTstate(so->giststate); + /* drop pins on buffers -- we aren't holding any locks */ + if (BufferIsValid(so->curbuf)) + ReleaseBuffer(so->curbuf); + MemoryContextDelete(so->tempCxt); + pfree(so); PG_RETURN_VOID(); } diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index bb46446d713..e53ec3d5eaa 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -366,12 +366,16 @@ Datum hashbeginscan(PG_FUNCTION_ARGS) { Relation rel = (Relation) PG_GETARG_POINTER(0); - int keysz = PG_GETARG_INT32(1); - ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2); + int nkeys = PG_GETARG_INT32(1); + int norderbys = PG_GETARG_INT32(2); IndexScanDesc scan; HashScanOpaque so; - scan = RelationGetIndexScan(rel, keysz, scankey); + /* no order by operators allowed */ + Assert(norderbys == 0); + + scan = RelationGetIndexScan(rel, nkeys, norderbys); + so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData)); so->hashso_bucket_valid = false; so->hashso_bucket_blkno = 0; @@ -396,26 +400,23 @@ hashrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); + /* remaining arguments are ignored */ HashScanOpaque so = (HashScanOpaque) scan->opaque; Relation rel = scan->indexRelation; - /* if we are called from beginscan, so is still NULL */ - if (so) - { - /* release any pin we still hold */ - if (BufferIsValid(so->hashso_curbuf)) - _hash_dropbuf(rel, so->hashso_curbuf); - so->hashso_curbuf = InvalidBuffer; - - /* release lock on bucket, too */ - if (so->hashso_bucket_blkno) - _hash_droplock(rel, so->hashso_bucket_blkno, HASH_SHARE); - so->hashso_bucket_blkno = 0; - - /* set position invalid (this will cause _hash_first call) */ - ItemPointerSetInvalid(&(so->hashso_curpos)); - ItemPointerSetInvalid(&(so->hashso_heappos)); - } + /* release any pin we still hold */ + if (BufferIsValid(so->hashso_curbuf)) + _hash_dropbuf(rel, so->hashso_curbuf); + so->hashso_curbuf = InvalidBuffer; + + /* release lock on bucket, too */ + if (so->hashso_bucket_blkno) + _hash_droplock(rel, so->hashso_bucket_blkno, HASH_SHARE); + so->hashso_bucket_blkno = 0; + + /* set position invalid (this will cause _hash_first call) */ + ItemPointerSetInvalid(&(so->hashso_curpos)); + ItemPointerSetInvalid(&(so->hashso_heappos)); /* Update scan key, if a new one is given */ if (scankey && scan->numberOfKeys > 0) @@ -423,8 +424,7 @@ hashrescan(PG_FUNCTION_ARGS) memmove(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData)); - if (so) - so->hashso_bucket_valid = false; + so->hashso_bucket_valid = false; } PG_RETURN_VOID(); diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index cd0212aa94d..d0eaa36b3b5 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -57,22 +57,20 @@ /* ---------------- * RelationGetIndexScan -- Create and fill an IndexScanDesc. * - * This routine creates an index scan structure and sets its contents - * up correctly. This routine calls AMrescan to set up the scan with - * the passed key. + * This routine creates an index scan structure and sets up initial + * contents for it. * * Parameters: * indexRelation -- index relation for scan. - * nkeys -- count of scan keys. - * key -- array of scan keys to restrict the index scan. + * nkeys -- count of scan keys (index qual conditions). + * norderbys -- count of index order-by operators. * * Returns: * An initialized IndexScanDesc. * ---------------- */ IndexScanDesc -RelationGetIndexScan(Relation indexRelation, - int nkeys, ScanKey key) +RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) { IndexScanDesc scan; @@ -82,15 +80,19 @@ RelationGetIndexScan(Relation indexRelation, scan->indexRelation = indexRelation; scan->xs_snapshot = SnapshotNow; /* may be set later */ scan->numberOfKeys = nkeys; + scan->numberOfOrderBys = norderbys; /* - * We allocate the key space here, but the AM is responsible for actually - * filling it from the passed key array. + * We allocate key workspace here, but it won't get filled until amrescan. */ if (nkeys > 0) scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); else scan->keyData = NULL; + if (norderbys > 0) + scan->orderByData = (ScanKey) palloc(sizeof(ScanKeyData) * norderbys); + else + scan->orderByData = NULL; /* * During recovery we ignore killed tuples and don't bother to kill them @@ -115,11 +117,6 @@ RelationGetIndexScan(Relation indexRelation, scan->xs_next_hot = InvalidOffsetNumber; scan->xs_prev_xmax = InvalidTransactionId; - /* - * Let the AM fill in the key and any opaque data it wants. - */ - index_rescan(scan, key); - return scan; } @@ -140,6 +137,8 @@ IndexScanEnd(IndexScanDesc scan) { if (scan->keyData != NULL) pfree(scan->keyData); + if (scan->orderByData != NULL) + pfree(scan->orderByData); pfree(scan); } @@ -286,7 +285,8 @@ systable_beginscan(Relation heapRelation, } sysscan->iscan = index_beginscan(heapRelation, irel, - snapshot, nkeys, key); + snapshot, nkeys, 0); + index_rescan(sysscan->iscan, key, nkeys, NULL, 0); sysscan->scan = NULL; } else @@ -450,7 +450,8 @@ systable_beginscan_ordered(Relation heapRelation, } sysscan->iscan = index_beginscan(heapRelation, indexRelation, - snapshot, nkeys, key); + snapshot, nkeys, 0); + index_rescan(sysscan->iscan, key, nkeys, NULL, 0); sysscan->scan = NULL; return sysscan; diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index d151ffda8c0..8c79c6149b6 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -114,7 +114,7 @@ do { \ } while(0) static IndexScanDesc index_beginscan_internal(Relation indexRelation, - int nkeys, ScanKey key); + int nkeys, int norderbys); /* ---------------------------------------------------------------- @@ -213,11 +213,11 @@ IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, - int nkeys, ScanKey key) + int nkeys, int norderbys) { IndexScanDesc scan; - scan = index_beginscan_internal(indexRelation, nkeys, key); + scan = index_beginscan_internal(indexRelation, nkeys, norderbys); /* * Save additional parameters into the scandesc. Everything else was set @@ -238,11 +238,11 @@ index_beginscan(Relation heapRelation, IndexScanDesc index_beginscan_bitmap(Relation indexRelation, Snapshot snapshot, - int nkeys, ScanKey key) + int nkeys) { IndexScanDesc scan; - scan = index_beginscan_internal(indexRelation, nkeys, key); + scan = index_beginscan_internal(indexRelation, nkeys, 0); /* * Save additional parameters into the scandesc. Everything else was set @@ -258,7 +258,7 @@ index_beginscan_bitmap(Relation indexRelation, */ static IndexScanDesc index_beginscan_internal(Relation indexRelation, - int nkeys, ScanKey key) + int nkeys, int norderbys) { IndexScanDesc scan; FmgrInfo *procedure; @@ -278,7 +278,7 @@ index_beginscan_internal(Relation indexRelation, DatumGetPointer(FunctionCall3(procedure, PointerGetDatum(indexRelation), Int32GetDatum(nkeys), - PointerGetDatum(key))); + Int32GetDatum(norderbys))); return scan; } @@ -286,23 +286,28 @@ index_beginscan_internal(Relation indexRelation, /* ---------------- * index_rescan - (re)start a scan of an index * - * The caller may specify a new set of scankeys (but the number of keys - * cannot change). To restart the scan without changing keys, pass NULL - * for the key array. - * - * Note that this is also called when first starting an indexscan; - * see RelationGetIndexScan. Keys *must* be passed in that case, - * unless scan->numberOfKeys is zero. + * During a restart, the caller may specify a new set of scankeys and/or + * orderbykeys; but the number of keys cannot differ from what index_beginscan + * was told. (Later we might relax that to "must not exceed", but currently + * the index AMs tend to assume that scan->numberOfKeys is what to believe.) + * To restart the scan without changing keys, pass NULL for the key arrays. + * (Of course, keys *must* be passed on the first call, unless + * scan->numberOfKeys is zero.) * ---------------- */ void -index_rescan(IndexScanDesc scan, ScanKey key) +index_rescan(IndexScanDesc scan, + ScanKey keys, int nkeys, + ScanKey orderbys, int norderbys) { FmgrInfo *procedure; SCAN_CHECKS; GET_SCAN_PROCEDURE(amrescan); + Assert(nkeys == scan->numberOfKeys); + Assert(norderbys == scan->numberOfOrderBys); + /* Release any held pin on a heap page */ if (BufferIsValid(scan->xs_cbuf)) { @@ -314,9 +319,12 @@ index_rescan(IndexScanDesc scan, ScanKey key) scan->kill_prior_tuple = false; /* for safety */ - FunctionCall2(procedure, + FunctionCall5(procedure, PointerGetDatum(scan), - PointerGetDatum(key)); + PointerGetDatum(keys), + Int32GetDatum(nkeys), + PointerGetDatum(orderbys), + Int32GetDatum(norderbys)); } /* ---------------- diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 46aeb9e6adb..655a40090e9 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -337,12 +337,27 @@ Datum btbeginscan(PG_FUNCTION_ARGS) { Relation rel = (Relation) PG_GETARG_POINTER(0); - int keysz = PG_GETARG_INT32(1); - ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2); + int nkeys = PG_GETARG_INT32(1); + int norderbys = PG_GETARG_INT32(2); IndexScanDesc scan; + BTScanOpaque so; + + /* no order by operators allowed */ + Assert(norderbys == 0); /* get the scan */ - scan = RelationGetIndexScan(rel, keysz, scankey); + scan = RelationGetIndexScan(rel, nkeys, norderbys); + + /* allocate private workspace */ + so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData)); + so->currPos.buf = so->markPos.buf = InvalidBuffer; + if (scan->numberOfKeys > 0) + so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData)); + else + so->keyData = NULL; + so->killedItems = NULL; /* until needed */ + so->numKilled = 0; + scan->opaque = so; PG_RETURN_POINTER(scan); } @@ -355,22 +370,8 @@ btrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); - BTScanOpaque so; - - so = (BTScanOpaque) scan->opaque; - - if (so == NULL) /* if called from btbeginscan */ - { - so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData)); - so->currPos.buf = so->markPos.buf = InvalidBuffer; - if (scan->numberOfKeys > 0) - so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData)); - else - so->keyData = NULL; - so->killedItems = NULL; /* until needed */ - so->numKilled = 0; - scan->opaque = so; - } + /* remaining arguments are ignored */ + BTScanOpaque so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index bb7cd746b1b..e1dbd6d985b 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -875,8 +875,8 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, if (OldIndex != NULL && !use_sort) { heapScan = NULL; - indexScan = index_beginscan(OldHeap, OldIndex, - SnapshotAny, 0, (ScanKey) NULL); + indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0); + index_rescan(indexScan, NULL, 0, NULL, 0); } else { diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index a5e44c046f7..81885b4fb74 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1017,6 +1017,8 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_IndexScan: show_scan_qual(((IndexScan *) plan)->indexqualorig, "Index Cond", planstate, ancestors, es); + show_scan_qual(((IndexScan *) plan)->indexorderbyorig, + "Order By", planstate, ancestors, es); show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); break; case T_BitmapIndexScan: diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index 27ea91c0140..6bac6d06236 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -4694,7 +4694,7 @@ ExecInitExpr(Expr *node, PlanState *parent) Oid righttype; Oid proc; - get_op_opfamily_properties(opno, opfamily, + get_op_opfamily_properties(opno, opfamily, false, &strategy, &lefttype, &righttype); diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 57806ca8f0f..6ad0f1e52ad 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -1211,8 +1211,8 @@ check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo, retry: conflict = false; found_self = false; - index_scan = index_beginscan(heap, index, &DirtySnapshot, - index_natts, scankeys); + index_scan = index_beginscan(heap, index, &DirtySnapshot, index_natts, 0); + index_rescan(index_scan, scankeys, index_natts, NULL, 0); while ((tup = index_getnext(index_scan, ForwardScanDirection)) != NULL) diff --git a/src/backend/executor/nodeBitmapIndexscan.c b/src/backend/executor/nodeBitmapIndexscan.c index 97ce0dde294..573e294882c 100644 --- a/src/backend/executor/nodeBitmapIndexscan.c +++ b/src/backend/executor/nodeBitmapIndexscan.c @@ -95,7 +95,9 @@ MultiExecBitmapIndexScan(BitmapIndexScanState *node) doscan = ExecIndexAdvanceArrayKeys(node->biss_ArrayKeys, node->biss_NumArrayKeys); if (doscan) /* reset index scan */ - index_rescan(node->biss_ScanDesc, node->biss_ScanKeys); + index_rescan(node->biss_ScanDesc, + node->biss_ScanKeys, node->biss_NumScanKeys, + NULL, 0); } /* must provide our own instrumentation support */ @@ -147,7 +149,9 @@ ExecReScanBitmapIndexScan(BitmapIndexScanState *node) /* reset index scan */ if (node->biss_RuntimeKeysReady) - index_rescan(node->biss_ScanDesc, node->biss_ScanKeys); + index_rescan(node->biss_ScanDesc, + node->biss_ScanKeys, node->biss_NumScanKeys, + NULL, 0); } /* ---------------------------------------------------------------- @@ -256,6 +260,8 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) * Initialize index-specific scan state */ indexstate->biss_RuntimeKeysReady = false; + indexstate->biss_RuntimeKeys = NULL; + indexstate->biss_NumRuntimeKeys = 0; /* * build the index scan keys from the index qualification @@ -264,6 +270,7 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) indexstate->biss_RelationDesc, node->scan.scanrelid, node->indexqual, + false, &indexstate->biss_ScanKeys, &indexstate->biss_NumScanKeys, &indexstate->biss_RuntimeKeys, @@ -297,8 +304,17 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) indexstate->biss_ScanDesc = index_beginscan_bitmap(indexstate->biss_RelationDesc, estate->es_snapshot, - indexstate->biss_NumScanKeys, - indexstate->biss_ScanKeys); + indexstate->biss_NumScanKeys); + + /* + * If no run-time keys to calculate, go ahead and pass the scankeys to + * the index AM. + */ + if (indexstate->biss_NumRuntimeKeys == 0 && + indexstate->biss_NumArrayKeys == 0) + index_rescan(indexstate->biss_ScanDesc, + indexstate->biss_ScanKeys, indexstate->biss_NumScanKeys, + NULL, 0); /* * all done. diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index ee5fc72c209..3aed2960d3f 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -181,7 +181,9 @@ ExecReScanIndexScan(IndexScanState *node) node->iss_RuntimeKeysReady = true; /* reset index scan */ - index_rescan(node->iss_ScanDesc, node->iss_ScanKeys); + index_rescan(node->iss_ScanDesc, + node->iss_ScanKeys, node->iss_NumScanKeys, + node->iss_OrderByKeys, node->iss_NumOrderByKeys); ExecScanReScan(&node->ss); } @@ -480,10 +482,11 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) * initialize child expressions * * Note: we don't initialize all of the indexqual expression, only the - * sub-parts corresponding to runtime keys (see below). The indexqualorig - * expression is always initialized even though it will only be used in - * some uncommon cases --- would be nice to improve that. (Problem is - * that any SubPlans present in the expression must be found now...) + * sub-parts corresponding to runtime keys (see below). Likewise for + * indexorderby, if any. But the indexqualorig expression is always + * initialized even though it will only be used in some uncommon cases --- + * would be nice to improve that. (Problem is that any SubPlans present + * in the expression must be found now...) */ indexstate->ss.ps.targetlist = (List *) ExecInitExpr((Expr *) node->scan.plan.targetlist, @@ -543,6 +546,8 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) * Initialize index-specific scan state */ indexstate->iss_RuntimeKeysReady = false; + indexstate->iss_RuntimeKeys = NULL; + indexstate->iss_NumRuntimeKeys = 0; /* * build the index scan keys from the index qualification @@ -551,6 +556,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) indexstate->iss_RelationDesc, node->scan.scanrelid, node->indexqual, + false, &indexstate->iss_ScanKeys, &indexstate->iss_NumScanKeys, &indexstate->iss_RuntimeKeys, @@ -559,6 +565,21 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) NULL); /* + * any ORDER BY exprs have to be turned into scankeys in the same way + */ + ExecIndexBuildScanKeys((PlanState *) indexstate, + indexstate->iss_RelationDesc, + node->scan.scanrelid, + node->indexorderby, + true, + &indexstate->iss_OrderByKeys, + &indexstate->iss_NumOrderByKeys, + &indexstate->iss_RuntimeKeys, + &indexstate->iss_NumRuntimeKeys, + NULL, /* no ArrayKeys */ + NULL); + + /* * If we have runtime keys, we need an ExprContext to evaluate them. The * node's standard context won't do because we want to reset that context * for every tuple. So, build another context just like the other one... @@ -584,7 +605,16 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) indexstate->iss_RelationDesc, estate->es_snapshot, indexstate->iss_NumScanKeys, - indexstate->iss_ScanKeys); + indexstate->iss_NumOrderByKeys); + + /* + * If no run-time keys to calculate, go ahead and pass the scankeys to + * the index AM. + */ + if (indexstate->iss_NumRuntimeKeys == 0) + index_rescan(indexstate->iss_ScanDesc, + indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys, + indexstate->iss_OrderByKeys, indexstate->iss_NumOrderByKeys); /* * all done. @@ -624,12 +654,20 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) * 5. NullTest ("indexkey IS NULL/IS NOT NULL"). We just fill in the * ScanKey properly. * + * This code is also used to prepare ORDER BY expressions for amcanorderbyop + * indexes. The behavior is exactly the same, except that we have to look up + * the operator differently. Note that only cases 1 and 2 are currently + * possible for ORDER BY. + * * Input params are: * * planstate: executor state node we are working for * index: the index we are building scan keys for * scanrelid: varno of the index's relation within current query - * quals: indexquals expressions + * quals: indexquals (or indexorderbys) expressions + * isorderby: true if processing ORDER BY exprs, false if processing quals + * *runtimeKeys: ptr to pre-existing IndexRuntimeKeyInfos, or NULL if none + * *numRuntimeKeys: number of pre-existing runtime keys * * Output params are: * @@ -645,7 +683,8 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) */ void ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, - List *quals, ScanKey *scanKeys, int *numScanKeys, + List *quals, bool isorderby, + ScanKey *scanKeys, int *numScanKeys, IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys, IndexArrayKeyInfo **arrayKeys, int *numArrayKeys) { @@ -654,43 +693,31 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, IndexRuntimeKeyInfo *runtime_keys; IndexArrayKeyInfo *array_keys; int n_scan_keys; - int extra_scan_keys; int n_runtime_keys; + int max_runtime_keys; int n_array_keys; int j; + /* Allocate array for ScanKey structs: one per qual */ + n_scan_keys = list_length(quals); + scan_keys = (ScanKey) palloc(n_scan_keys * sizeof(ScanKeyData)); + /* - * If there are any RowCompareExpr quals, we need extra ScanKey entries - * for them, and possibly extra runtime-key entries. Count up what's - * needed. (The subsidiary ScanKey arrays for the RowCompareExprs could - * be allocated as separate chunks, but we have to count anyway to make - * runtime_keys large enough, so might as well just do one palloc.) + * runtime_keys array is dynamically resized as needed. We handle it + * this way so that the same runtime keys array can be shared between + * indexquals and indexorderbys, which will be processed in separate + * calls of this function. Caller must be sure to pass in NULL/0 for + * first call. */ - n_scan_keys = list_length(quals); - extra_scan_keys = 0; - foreach(qual_cell, quals) - { - if (IsA(lfirst(qual_cell), RowCompareExpr)) - extra_scan_keys += - list_length(((RowCompareExpr *) lfirst(qual_cell))->opnos); - } - scan_keys = (ScanKey) - palloc((n_scan_keys + extra_scan_keys) * sizeof(ScanKeyData)); - /* Allocate these arrays as large as they could possibly need to be */ - runtime_keys = (IndexRuntimeKeyInfo *) - palloc((n_scan_keys + extra_scan_keys) * sizeof(IndexRuntimeKeyInfo)); + runtime_keys = *runtimeKeys; + n_runtime_keys = max_runtime_keys = *numRuntimeKeys; + + /* Allocate array_keys as large as it could possibly need to be */ array_keys = (IndexArrayKeyInfo *) palloc0(n_scan_keys * sizeof(IndexArrayKeyInfo)); - n_runtime_keys = 0; n_array_keys = 0; /* - * Below here, extra_scan_keys is index of first cell to use for next - * RowCompareExpr - */ - extra_scan_keys = n_scan_keys; - - /* * for each opclause in the given qual, convert the opclause into a single * scan key */ @@ -742,11 +769,14 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, */ opfamily = index->rd_opfamily[varattno - 1]; - get_op_opfamily_properties(opno, opfamily, + get_op_opfamily_properties(opno, opfamily, isorderby, &op_strategy, &op_lefttype, &op_righttype); + if (isorderby) + flags |= SK_ORDER_BY; + /* * rightop is the constant or variable comparison value */ @@ -767,6 +797,21 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, else { /* Need to treat this one as a runtime key */ + if (n_runtime_keys >= max_runtime_keys) + { + if (max_runtime_keys == 0) + { + max_runtime_keys = 8; + runtime_keys = (IndexRuntimeKeyInfo *) + palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); + } + else + { + max_runtime_keys *= 2; + runtime_keys = (IndexRuntimeKeyInfo *) + repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); + } + } runtime_keys[n_runtime_keys].scan_key = this_scan_key; runtime_keys[n_runtime_keys].key_expr = ExecInitExpr(rightop, planstate); @@ -794,12 +839,19 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, ListCell *largs_cell = list_head(rc->largs); ListCell *rargs_cell = list_head(rc->rargs); ListCell *opnos_cell = list_head(rc->opnos); - ScanKey first_sub_key = &scan_keys[extra_scan_keys]; + ScanKey first_sub_key; + int n_sub_key; + + Assert(!isorderby); + + first_sub_key = (ScanKey) + palloc(list_length(rc->opnos) * sizeof(ScanKeyData)); + n_sub_key = 0; /* Scan RowCompare columns and generate subsidiary ScanKey items */ while (opnos_cell != NULL) { - ScanKey this_sub_key = &scan_keys[extra_scan_keys]; + ScanKey this_sub_key = &first_sub_key[n_sub_key]; int flags = SK_ROW_MEMBER; Datum scanvalue; @@ -832,7 +884,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, elog(ERROR, "bogus RowCompare index qualification"); opfamily = index->rd_opfamily[varattno - 1]; - get_op_opfamily_properties(opno, opfamily, + get_op_opfamily_properties(opno, opfamily, isorderby, &op_strategy, &op_lefttype, &op_righttype); @@ -866,6 +918,21 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, else { /* Need to treat this one as a runtime key */ + if (n_runtime_keys >= max_runtime_keys) + { + if (max_runtime_keys == 0) + { + max_runtime_keys = 8; + runtime_keys = (IndexRuntimeKeyInfo *) + palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); + } + else + { + max_runtime_keys *= 2; + runtime_keys = (IndexRuntimeKeyInfo *) + repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo)); + } + } runtime_keys[n_runtime_keys].scan_key = this_sub_key; runtime_keys[n_runtime_keys].key_expr = ExecInitExpr(rightop, planstate); @@ -885,11 +952,11 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, op_righttype, /* strategy subtype */ opfuncid, /* reg proc to use */ scanvalue); /* constant */ - extra_scan_keys++; + n_sub_key++; } /* Mark the last subsidiary scankey correctly */ - scan_keys[extra_scan_keys - 1].sk_flags |= SK_ROW_END; + first_sub_key[n_sub_key - 1].sk_flags |= SK_ROW_END; /* * We don't use ScanKeyEntryInitialize for the header because it @@ -907,6 +974,8 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, /* indexkey op ANY (array-expression) */ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + Assert(!isorderby); + Assert(saop->useOr); opno = saop->opno; opfuncid = saop->opfuncid; @@ -935,7 +1004,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, */ opfamily = index->rd_opfamily[varattno - 1]; - get_op_opfamily_properties(opno, opfamily, + get_op_opfamily_properties(opno, opfamily, isorderby, &op_strategy, &op_lefttype, &op_righttype); @@ -973,6 +1042,8 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, NullTest *ntest = (NullTest *) clause; int flags; + Assert(!isorderby); + /* * argument should be the index key Var, possibly relabeled */ @@ -1020,12 +1091,9 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, (int) nodeTag(clause)); } + Assert(n_runtime_keys <= max_runtime_keys); + /* Get rid of any unused arrays */ - if (n_runtime_keys == 0) - { - pfree(runtime_keys); - runtime_keys = NULL; - } if (n_array_keys == 0) { pfree(array_keys); diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index e8ce5bc02b3..98d1615514b 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -201,7 +201,7 @@ MJExamineQuals(List *mergeclauses, clause->rexpr = ExecInitExpr((Expr *) lsecond(qual->args), parent); /* Extract the operator's declared left/right datatypes */ - get_op_opfamily_properties(qual->opno, opfamily, + get_op_opfamily_properties(qual->opno, opfamily, false, &op_strategy, &op_lefttype, &op_righttype); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 0e0b4dc598a..4506518768d 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -363,6 +363,8 @@ _copyIndexScan(IndexScan *from) COPY_SCALAR_FIELD(indexid); COPY_NODE_FIELD(indexqual); COPY_NODE_FIELD(indexqualorig); + COPY_NODE_FIELD(indexorderby); + COPY_NODE_FIELD(indexorderbyorig); COPY_SCALAR_FIELD(indexorderdir); return newnode; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index afbfccabda5..5d09e16477d 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -439,6 +439,8 @@ _outIndexScan(StringInfo str, IndexScan *node) WRITE_OID_FIELD(indexid); WRITE_NODE_FIELD(indexqual); WRITE_NODE_FIELD(indexqualorig); + WRITE_NODE_FIELD(indexorderby); + WRITE_NODE_FIELD(indexorderbyorig); WRITE_ENUM_FIELD(indexorderdir, ScanDirection); } @@ -1424,6 +1426,7 @@ _outIndexPath(StringInfo str, IndexPath *node) WRITE_NODE_FIELD(indexinfo); WRITE_NODE_FIELD(indexclauses); WRITE_NODE_FIELD(indexquals); + WRITE_NODE_FIELD(indexorderbys); WRITE_BOOL_FIELD(isjoininner); WRITE_ENUM_FIELD(indexscandir, ScanDirection); WRITE_FLOAT_FIELD(indextotalcost, "%.2f"); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 0724f9a6c9c..e6edbdb1e84 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -209,6 +209,7 @@ cost_seqscan(Path *path, PlannerInfo *root, * * 'index' is the index to be used * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics) + * 'indexOrderBys' is the list of ORDER BY operators for amcanorderbyop indexes * 'outer_rel' is the outer relation when we are considering using the index * scan as the inside of a nestloop join (hence, some of the indexQuals * are join clauses, and we should expect repeated scans of the index); @@ -218,18 +219,19 @@ cost_seqscan(Path *path, PlannerInfo *root, * additional fields of the IndexPath besides startup_cost and total_cost. * These fields are needed if the IndexPath is used in a BitmapIndexScan. * + * indexQuals is a list of RestrictInfo nodes, but indexOrderBys is a list of + * bare expressions. + * * NOTE: 'indexQuals' must contain only clauses usable as index restrictions. * Any additional quals evaluated as qpquals may reduce the number of returned * tuples, but they won't reduce the number of tuples we have to fetch from * the table, so they don't reduce the scan cost. - * - * NOTE: as of 8.0, indexQuals is a list of RestrictInfo nodes, where formerly - * it was a list of bare clause expressions. */ void cost_index(IndexPath *path, PlannerInfo *root, IndexOptInfo *index, List *indexQuals, + List *indexOrderBys, RelOptInfo *outer_rel) { RelOptInfo *baserel = index->rel; @@ -263,10 +265,11 @@ cost_index(IndexPath *path, PlannerInfo *root, * the fraction of main-table tuples we will have to retrieve) and its * correlation to the main-table tuple order. */ - OidFunctionCall8(index->amcostestimate, + OidFunctionCall9(index->amcostestimate, PointerGetDatum(root), PointerGetDatum(index), PointerGetDatum(indexQuals), + PointerGetDatum(indexOrderBys), PointerGetDatum(outer_rel), PointerGetDatum(&indexStartupCost), PointerGetDatum(&indexTotalCost), diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index f73e0e6dc60..90ccb3928b9 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -89,6 +89,9 @@ static bool match_rowcompare_to_indexcol(IndexOptInfo *index, Oid opfamily, RowCompareExpr *clause, Relids outer_relids); +static List *match_index_to_pathkeys(IndexOptInfo *index, List *pathkeys); +static Expr *match_clause_to_ordering_op(IndexOptInfo *index, + int indexcol, Expr *clause, Oid pk_opfamily); static Relids indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel); static bool matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel, Relids outer_relids); @@ -286,6 +289,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); IndexPath *ipath; List *restrictclauses; + List *orderbyclauses; List *index_pathkeys; List *useful_pathkeys; bool useful_predicate; @@ -388,9 +392,24 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, ForwardScanDirection); useful_pathkeys = truncate_useless_pathkeys(root, rel, index_pathkeys); + orderbyclauses = NIL; + } + else if (index->amcanorderbyop && possibly_useful_pathkeys && + istoplevel && outer_rel == NULL && scantype != ST_BITMAPSCAN) + { + /* see if we can generate ordering operators for query_pathkeys */ + orderbyclauses = match_index_to_pathkeys(index, + root->query_pathkeys); + if (orderbyclauses) + useful_pathkeys = root->query_pathkeys; + else + useful_pathkeys = NIL; } else + { useful_pathkeys = NIL; + orderbyclauses = NIL; + } /* * 3. Generate an indexscan path if there are relevant restriction @@ -402,6 +421,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, { ipath = create_index_path(root, index, restrictclauses, + orderbyclauses, useful_pathkeys, index_is_ordered ? ForwardScanDirection : @@ -425,6 +445,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, { ipath = create_index_path(root, index, restrictclauses, + NIL, useful_pathkeys, BackwardScanDirection, outer_rel); @@ -1385,6 +1406,179 @@ match_rowcompare_to_indexcol(IndexOptInfo *index, /**************************************************************************** + * ---- ROUTINES TO CHECK ORDERING OPERATORS ---- + ****************************************************************************/ + +/* + * match_index_to_pathkeys + * Test whether an index can produce output ordered according to the + * given pathkeys using "ordering operators". + * + * If it can, return a list of suitable ORDER BY expressions, each of the form + * "indexedcol operator pseudoconstant". If not, return NIL. + */ +static List * +match_index_to_pathkeys(IndexOptInfo *index, List *pathkeys) +{ + List *orderbyexprs = NIL; + ListCell *lc1; + + /* Only indexes with the amcanorderbyop property are interesting here */ + if (!index->amcanorderbyop) + return NIL; + + foreach(lc1, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc1); + bool found = false; + ListCell *lc2; + + /* + * Note: for any failure to match, we just return NIL immediately. + * There is no value in matching just some of the pathkeys. + */ + + /* Pathkey must request default sort order for the target opfamily */ + if (pathkey->pk_strategy != BTLessStrategyNumber || + pathkey->pk_nulls_first) + return NIL; + + /* If eclass is volatile, no hope of using an indexscan */ + if (pathkey->pk_eclass->ec_has_volatile) + return NIL; + + /* Try to match eclass member expression(s) to index */ + foreach(lc2, pathkey->pk_eclass->ec_members) + { + EquivalenceMember *member = (EquivalenceMember *) lfirst(lc2); + int indexcol; + + /* No possibility of match if it references other relations */ + if (!bms_equal(member->em_relids, index->rel->relids)) + continue; + + for (indexcol = 0; indexcol < index->ncolumns; indexcol++) + { + Expr *expr; + + expr = match_clause_to_ordering_op(index, + indexcol, + member->em_expr, + pathkey->pk_opfamily); + if (expr) + { + orderbyexprs = lappend(orderbyexprs, expr); + found = true; + break; + } + } + + if (found) /* don't want to look at remaining members */ + break; + } + + if (!found) /* fail if no match for this pathkey */ + return NIL; + } + + return orderbyexprs; /* success! */ +} + +/* + * match_clause_to_ordering_op + * Determines whether an ordering operator expression matches an + * index column. + * + * This is similar to, but simpler than, match_clause_to_indexcol. + * We only care about simple OpExpr cases. The input is a bare + * expression that is being ordered by, which must be of the form + * (indexkey op const) or (const op indexkey) where op is an ordering + * operator for the column's opfamily. + * + * 'index' is the index of interest. + * 'indexcol' is a column number of 'index' (counting from 0). + * 'clause' is the ordering expression to be tested. + * 'pk_opfamily' is the btree opfamily describing the required sort order. + * + * If successful, return 'clause' as-is if the indexkey is on the left, + * otherwise a commuted copy of 'clause'. If no match, return NULL. + */ +static Expr * +match_clause_to_ordering_op(IndexOptInfo *index, + int indexcol, + Expr *clause, + Oid pk_opfamily) +{ + Oid opfamily = index->opfamily[indexcol]; + Node *leftop, + *rightop; + Oid expr_op; + Oid sortfamily; + bool commuted; + + /* + * Clause must be a binary opclause. + */ + if (!is_opclause(clause)) + return NULL; + leftop = get_leftop(clause); + rightop = get_rightop(clause); + if (!leftop || !rightop) + return NULL; + expr_op = ((OpExpr *) clause)->opno; + + /* + * Check for clauses of the form: (indexkey operator constant) or + * (constant operator indexkey). + */ + if (match_index_to_operand(leftop, indexcol, index) && + !contain_var_clause(rightop) && + !contain_volatile_functions(rightop)) + { + commuted = false; + } + else if (match_index_to_operand(rightop, indexcol, index) && + !contain_var_clause(leftop) && + !contain_volatile_functions(leftop)) + { + /* Might match, but we need a commuted operator */ + expr_op = get_commutator(expr_op); + if (expr_op == InvalidOid) + return NULL; + commuted = true; + } + else + return NULL; + + /* + * Is the (commuted) operator an ordering operator for the opfamily? + * And if so, does it yield the right sorting semantics? + */ + sortfamily = get_op_opfamily_sortfamily(expr_op, opfamily); + if (sortfamily != pk_opfamily) + return NULL; + + /* We have a match. Return clause or a commuted version thereof. */ + if (commuted) + { + OpExpr *newclause = makeNode(OpExpr); + + /* flat-copy all the fields of clause */ + memcpy(newclause, clause, sizeof(OpExpr)); + + /* commute it */ + newclause->opno = expr_op; + newclause->opfuncid = InvalidOid; + newclause->args = list_make2(rightop, leftop); + + clause = (Expr *) newclause; + } + + return clause; +} + + +/**************************************************************************** * ---- ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS ---- ****************************************************************************/ @@ -2630,7 +2824,7 @@ expand_indexqual_rowcompare(RestrictInfo *rinfo, expr_op = linitial_oid(clause->opnos); if (!var_on_left) expr_op = get_commutator(expr_op); - get_op_opfamily_properties(expr_op, index->opfamily[indexcol], + get_op_opfamily_properties(expr_op, index->opfamily[indexcol], false, &op_strategy, &op_lefttype, &op_righttype); @@ -2698,7 +2892,7 @@ expand_indexqual_rowcompare(RestrictInfo *rinfo, break; /* Add opfamily and datatypes to lists */ - get_op_opfamily_properties(expr_op, index->opfamily[i], + get_op_opfamily_properties(expr_op, index->opfamily[i], false, &op_strategy, &op_lefttype, &op_righttype); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 41ad512a296..1bbf35ed74d 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -81,6 +81,8 @@ static Node *replace_nestloop_params(PlannerInfo *root, Node *expr); static Node *replace_nestloop_params_mutator(Node *node, PlannerInfo *root); static List *fix_indexqual_references(PlannerInfo *root, IndexPath *index_path, List *indexquals); +static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path, + List *indexorderbys); static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index); static List *get_switched_clauses(List *clauses, Relids outerrelids); static List *order_qual_clauses(PlannerInfo *root, List *clauses); @@ -89,6 +91,7 @@ static void copy_plan_costsize(Plan *dest, Plan *src); static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid); static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid, Oid indexid, List *indexqual, List *indexqualorig, + List *indexorderby, List *indexorderbyorig, ScanDirection indexscandir); static BitmapIndexScan *make_bitmap_indexscan(Index scanrelid, Oid indexid, List *indexqual, @@ -1028,11 +1031,13 @@ create_indexscan_plan(PlannerInfo *root, List *scan_clauses) { List *indexquals = best_path->indexquals; + List *indexorderbys = best_path->indexorderbys; Index baserelid = best_path->path.parent->relid; Oid indexoid = best_path->indexinfo->indexoid; List *qpqual; List *stripped_indexquals; List *fixed_indexquals; + List *fixed_indexorderbys; ListCell *l; IndexScan *scan_plan; @@ -1053,6 +1058,11 @@ create_indexscan_plan(PlannerInfo *root, fixed_indexquals = fix_indexqual_references(root, best_path, indexquals); /* + * Likewise fix up index attr references in the ORDER BY expressions. + */ + fixed_indexorderbys = fix_indexorderby_references(root, best_path, indexorderbys); + + /* * If this is an innerjoin scan, the indexclauses will contain join * clauses that are not present in scan_clauses (since the passed-in value * is just the rel's baserestrictinfo list). We must add these clauses to @@ -1123,11 +1133,12 @@ create_indexscan_plan(PlannerInfo *root, /* * We have to replace any outer-relation variables with nestloop params - * in the indexqualorig and qpqual expressions. A bit annoying to have to - * do this separately from the processing in fix_indexqual_references --- - * rethink this when generalizing the inner indexscan support. But note - * we can't really do this earlier because it'd break the comparisons to - * predicates above ... (or would it? Those wouldn't have outer refs) + * in the indexqualorig, qpqual, and indexorderbyorig expressions. A bit + * annoying to have to do this separately from the processing in + * fix_indexqual_references --- rethink this when generalizing the inner + * indexscan support. But note we can't really do this earlier because + * it'd break the comparisons to predicates above ... (or would it? Those + * wouldn't have outer refs) */ if (best_path->isjoininner) { @@ -1135,6 +1146,8 @@ create_indexscan_plan(PlannerInfo *root, replace_nestloop_params(root, (Node *) stripped_indexquals); qpqual = (List *) replace_nestloop_params(root, (Node *) qpqual); + indexorderbys = (List *) + replace_nestloop_params(root, (Node *) indexorderbys); } /* Finally ready to build the plan node */ @@ -1144,6 +1157,8 @@ create_indexscan_plan(PlannerInfo *root, indexoid, fixed_indexquals, stripped_indexquals, + fixed_indexorderbys, + indexorderbys, best_path->indexscandir); copy_path_costsize(&scan_plan->scan.plan, &best_path->path); @@ -2395,6 +2410,63 @@ fix_indexqual_references(PlannerInfo *root, IndexPath *index_path, } /* + * fix_indexorderby_references + * Adjust indexorderby clauses to the form the executor's index + * machinery needs. + * + * This is a simplified version of fix_indexqual_references. The input does + * not have RestrictInfo nodes, and we assume that indxqual.c already + * commuted the clauses to put the index keys on the left. Also, we don't + * bother to support any cases except simple OpExprs, since nothing else + * is allowed for ordering operators. + */ +static List * +fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path, + List *indexorderbys) +{ + IndexOptInfo *index = index_path->indexinfo; + List *fixed_indexorderbys; + ListCell *l; + + fixed_indexorderbys = NIL; + + foreach(l, indexorderbys) + { + Node *clause = (Node *) lfirst(l); + + /* + * Replace any outer-relation variables with nestloop params. + * + * This also makes a copy of the clause, so it's safe to modify it + * in-place below. + */ + clause = replace_nestloop_params(root, clause); + + if (IsA(clause, OpExpr)) + { + OpExpr *op = (OpExpr *) clause; + + if (list_length(op->args) != 2) + elog(ERROR, "indexorderby clause is not binary opclause"); + + /* + * Now, determine which index attribute this is and change the + * indexkey operand as needed. + */ + linitial(op->args) = fix_indexqual_operand(linitial(op->args), + index); + } + else + elog(ERROR, "unsupported indexorderby type: %d", + (int) nodeTag(clause)); + + fixed_indexorderbys = lappend(fixed_indexorderbys, clause); + } + + return fixed_indexorderbys; +} + +/* * fix_indexqual_operand * Convert an indexqual expression to a Var referencing the index column. */ @@ -2685,6 +2757,8 @@ make_indexscan(List *qptlist, Oid indexid, List *indexqual, List *indexqualorig, + List *indexorderby, + List *indexorderbyorig, ScanDirection indexscandir) { IndexScan *node = makeNode(IndexScan); @@ -2699,6 +2773,8 @@ make_indexscan(List *qptlist, node->indexid = indexid; node->indexqual = indexqual; node->indexqualorig = indexqualorig; + node->indexorderby = indexorderby; + node->indexorderbyorig = indexorderbyorig; node->indexorderdir = indexscandir; return node; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a1e59005921..6d0b3dbce95 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3135,7 +3135,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) /* Estimate the cost of index scan */ indexScanPath = create_index_path(root, indexInfo, - NIL, NIL, + NIL, NIL, NIL, ForwardScanDirection, NULL); return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost); diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 9aef7fc35a2..0074679207a 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -301,6 +301,10 @@ set_plan_refs(PlannerGlobal *glob, Plan *plan, int rtoffset) fix_scan_list(glob, splan->indexqual, rtoffset); splan->indexqualorig = fix_scan_list(glob, splan->indexqualorig, rtoffset); + splan->indexorderby = + fix_scan_list(glob, splan->indexorderby, rtoffset); + splan->indexorderbyorig = + fix_scan_list(glob, splan->indexorderbyorig, rtoffset); } break; case T_BitmapIndexScan: diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 754753cc12d..39ef420284d 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -1942,10 +1942,13 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, case T_IndexScan: finalize_primnode((Node *) ((IndexScan *) plan)->indexqual, &context); + finalize_primnode((Node *) ((IndexScan *) plan)->indexorderby, + &context); /* * we need not look at indexqualorig, since it will have the same - * param references as indexqual. + * param references as indexqual. Likewise, we can ignore + * indexorderbyorig. */ context.paramids = bms_add_members(context.paramids, scan_params); break; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 231d221b21e..2439d814ce8 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -414,6 +414,8 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel) * 'index' is a usable index. * 'clause_groups' is a list of lists of RestrictInfo nodes * to be used as index qual conditions in the scan. + * 'indexorderbys' is a list of bare expressions (no RestrictInfos) + * to be used as index ordering operators in the scan. * 'pathkeys' describes the ordering of the path. * 'indexscandir' is ForwardScanDirection or BackwardScanDirection * for an ordered index, or NoMovementScanDirection for @@ -427,6 +429,7 @@ IndexPath * create_index_path(PlannerInfo *root, IndexOptInfo *index, List *clause_groups, + List *indexorderbys, List *pathkeys, ScanDirection indexscandir, RelOptInfo *outer_rel) @@ -463,6 +466,7 @@ create_index_path(PlannerInfo *root, pathnode->indexinfo = index; pathnode->indexclauses = allclauses; pathnode->indexquals = indexquals; + pathnode->indexorderbys = indexorderbys; pathnode->isjoininner = (outer_rel != NULL); pathnode->indexscandir = indexscandir; @@ -504,7 +508,7 @@ create_index_path(PlannerInfo *root, pathnode->rows = rel->rows; } - cost_index(pathnode, root, index, indexquals, outer_rel); + cost_index(pathnode, root, index, indexquals, indexorderbys, outer_rel); return pathnode; } diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 95397aa7cee..ef87f724ae9 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -2631,7 +2631,7 @@ mergejoinscansel(PlannerInfo *root, Node *clause, examine_variable(root, right, 0, &rightvar); /* Extract the operator's declared left/right datatypes */ - get_op_opfamily_properties(opno, opfamily, + get_op_opfamily_properties(opno, opfamily, false, &op_strategy, &op_lefttype, &op_righttype); @@ -4646,7 +4646,8 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, if (min) { index_scan = index_beginscan(heapRel, indexRel, SnapshotNow, - 1, scankeys); + 1, 0); + index_rescan(index_scan, scankeys, 1, NULL, 0); /* Fetch first tuple in sortop's direction */ if ((tup = index_getnext(index_scan, @@ -4677,7 +4678,8 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, if (max && have_data) { index_scan = index_beginscan(heapRel, indexRel, SnapshotNow, - 1, scankeys); + 1, 0); + index_rescan(index_scan, scankeys, 1, NULL, 0); /* Fetch first tuple in reverse direction */ if ((tup = index_getnext(index_scan, @@ -5644,7 +5646,9 @@ string_to_bytea_const(const char *str, size_t str_len) static void genericcostestimate(PlannerInfo *root, - IndexOptInfo *index, List *indexQuals, + IndexOptInfo *index, + List *indexQuals, + List *indexOrderBys, RelOptInfo *outer_rel, double numIndexTuples, Cost *indexStartupCost, @@ -5856,7 +5860,8 @@ genericcostestimate(PlannerInfo *root, * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per * indexqual operator. Because we have numIndexTuples as a per-scan * number, we have to multiply by num_sa_scans to get the correct result - * for ScalarArrayOpExpr cases. + * for ScalarArrayOpExpr cases. Similarly add in costs for any index + * ORDER BY expressions. * * Note: this neglects the possible costs of rechecking lossy operators * and OR-clause expressions. Detecting that that might be needed seems @@ -5864,11 +5869,15 @@ genericcostestimate(PlannerInfo *root, * inaccuracies here ... */ cost_qual_eval(&index_qual_cost, indexQuals, root); - qual_op_cost = cpu_operator_cost * list_length(indexQuals); - qual_arg_cost = index_qual_cost.startup + - index_qual_cost.per_tuple - qual_op_cost; + qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple; + cost_qual_eval(&index_qual_cost, indexOrderBys, root); + qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple; + qual_op_cost = cpu_operator_cost * + (list_length(indexQuals) + list_length(indexOrderBys)); + qual_arg_cost -= qual_op_cost; if (qual_arg_cost < 0) /* just in case... */ qual_arg_cost = 0; + *indexStartupCost = qual_arg_cost; *indexTotalCost += qual_arg_cost; *indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost); @@ -5901,11 +5910,12 @@ btcostestimate(PG_FUNCTION_ARGS) PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1); List *indexQuals = (List *) PG_GETARG_POINTER(2); - RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3); - Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4); - Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); - Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); - double *indexCorrelation = (double *) PG_GETARG_POINTER(7); + List *indexOrderBys = (List *) PG_GETARG_POINTER(3); + RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4); + Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(5); + Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(6); + Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7); + double *indexCorrelation = (double *) PG_GETARG_POINTER(8); Oid relid; AttrNumber colnum; VariableStatData vardata; @@ -6082,7 +6092,8 @@ btcostestimate(PG_FUNCTION_ARGS) numIndexTuples = rint(numIndexTuples / num_sa_scans); } - genericcostestimate(root, index, indexQuals, outer_rel, numIndexTuples, + genericcostestimate(root, index, indexQuals, indexOrderBys, + outer_rel, numIndexTuples, indexStartupCost, indexTotalCost, indexSelectivity, indexCorrelation); @@ -6206,13 +6217,14 @@ hashcostestimate(PG_FUNCTION_ARGS) PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1); List *indexQuals = (List *) PG_GETARG_POINTER(2); - RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3); - Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4); - Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); - Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); - double *indexCorrelation = (double *) PG_GETARG_POINTER(7); - - genericcostestimate(root, index, indexQuals, outer_rel, 0.0, + List *indexOrderBys = (List *) PG_GETARG_POINTER(3); + RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4); + Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(5); + Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(6); + Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7); + double *indexCorrelation = (double *) PG_GETARG_POINTER(8); + + genericcostestimate(root, index, indexQuals, indexOrderBys, outer_rel, 0.0, indexStartupCost, indexTotalCost, indexSelectivity, indexCorrelation); @@ -6225,13 +6237,14 @@ gistcostestimate(PG_FUNCTION_ARGS) PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1); List *indexQuals = (List *) PG_GETARG_POINTER(2); - RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3); - Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4); - Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); - Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); - double *indexCorrelation = (double *) PG_GETARG_POINTER(7); - - genericcostestimate(root, index, indexQuals, outer_rel, 0.0, + List *indexOrderBys = (List *) PG_GETARG_POINTER(3); + RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4); + Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(5); + Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(6); + Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7); + double *indexCorrelation = (double *) PG_GETARG_POINTER(8); + + genericcostestimate(root, index, indexQuals, indexOrderBys, outer_rel, 0.0, indexStartupCost, indexTotalCost, indexSelectivity, indexCorrelation); @@ -6262,11 +6275,12 @@ gincostestimate(PG_FUNCTION_ARGS) PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1); List *indexQuals = (List *) PG_GETARG_POINTER(2); - RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3); - Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4); - Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5); - Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6); - double *indexCorrelation = (double *) PG_GETARG_POINTER(7); + List *indexOrderBys = (List *) PG_GETARG_POINTER(3); + RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4); + Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(5); + Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(6); + Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7); + double *indexCorrelation = (double *) PG_GETARG_POINTER(8); ListCell *l; int32 nfullscan = 0; List *selectivityQuals; @@ -6432,7 +6446,7 @@ gincostestimate(PG_FUNCTION_ARGS) * Get the operator's strategy number and declared input data types * within the index opfamily. */ - get_op_opfamily_properties(clause_op, index->opfamily[indexcol], + get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false, &strategy_op, &lefttype, &righttype); /* @@ -6581,15 +6595,18 @@ gincostestimate(PG_FUNCTION_ARGS) * Add on index qual eval costs, much as in genericcostestimate */ cost_qual_eval(&index_qual_cost, indexQuals, root); - qual_op_cost = cpu_operator_cost * list_length(indexQuals); - qual_arg_cost = index_qual_cost.startup + - index_qual_cost.per_tuple - qual_op_cost; + qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple; + cost_qual_eval(&index_qual_cost, indexOrderBys, root); + qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple; + qual_op_cost = cpu_operator_cost * + (list_length(indexQuals) + list_length(indexOrderBys)); + qual_arg_cost -= qual_op_cost; if (qual_arg_cost < 0) /* just in case... */ qual_arg_cost = 0; *indexStartupCost += qual_arg_cost; *indexTotalCost += qual_arg_cost; - *indexTotalCost += ( numTuples * *indexSelectivity ) * (cpu_index_tuple_cost + qual_op_cost); + *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost); PG_RETURN_VOID(); } diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 9beae0d9ef1..cbdfe05031f 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -86,18 +86,41 @@ get_op_opfamily_strategy(Oid opno, Oid opfamily) } /* + * get_op_opfamily_sortfamily + * + * If the operator is an ordering operator within the specified opfamily, + * return its amopsortfamily OID; else return InvalidOid. + */ +Oid +get_op_opfamily_sortfamily(Oid opno, Oid opfamily) +{ + HeapTuple tp; + Form_pg_amop amop_tup; + Oid result; + + tp = SearchSysCache3(AMOPOPID, + ObjectIdGetDatum(opno), + CharGetDatum(AMOP_ORDER), + ObjectIdGetDatum(opfamily)); + if (!HeapTupleIsValid(tp)) + return InvalidOid; + amop_tup = (Form_pg_amop) GETSTRUCT(tp); + result = amop_tup->amopsortfamily; + ReleaseSysCache(tp); + return result; +} + +/* * get_op_opfamily_properties * * Get the operator's strategy number and declared input data types * within the specified opfamily. * - * This function only considers search operators, not ordering operators. - * * Caller should already have verified that opno is a member of opfamily, * therefore we raise an error if the tuple is not found. */ void -get_op_opfamily_properties(Oid opno, Oid opfamily, +get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op, int *strategy, Oid *lefttype, Oid *righttype) @@ -107,7 +130,7 @@ get_op_opfamily_properties(Oid opno, Oid opfamily, tp = SearchSysCache3(AMOPOPID, ObjectIdGetDatum(opno), - CharGetDatum(AMOP_SEARCH), + CharGetDatum(ordering_op ? AMOP_ORDER : AMOP_SEARCH), ObjectIdGetDatum(opfamily)); if (!HeapTupleIsValid(tp)) elog(ERROR, "operator %u is not a member of opfamily %u", |