diff options
author | Robert Haas <rhaas@postgresql.org> | 2015-01-19 15:20:31 -0500 |
---|---|---|
committer | Robert Haas <rhaas@postgresql.org> | 2015-01-19 15:28:27 -0500 |
commit | 4ea51cdfe85ceef8afabceb03c446574daa0ac23 (patch) | |
tree | 703433dfca58c84f8b947b6f4f562ecdf98f1669 /src/backend/executor | |
parent | 1605291b6c14be92915948d17f5509191632c97f (diff) | |
download | postgresql-4ea51cdfe85ceef8afabceb03c446574daa0ac23.tar.gz postgresql-4ea51cdfe85ceef8afabceb03c446574daa0ac23.zip |
Use abbreviated keys for faster sorting of text datums.
This commit extends the SortSupport infrastructure to allow operator
classes the option to provide abbreviated representations of Datums;
in the case of text, we abbreviate by taking the first few characters
of the strxfrm() blob. If the abbreviated comparison is insufficent
to resolve the comparison, we fall back on the normal comparator.
This can be much faster than the old way of doing sorting if the
first few bytes of the string are usually sufficient to resolve the
comparison.
There is the potential for a performance regression if all of the
strings to be sorted are identical for the first 8+ characters and
differ only in later positions; therefore, the SortSupport machinery
now provides an infrastructure to abort the use of abbreviation if
it appears that abbreviation is producing comparatively few distinct
keys. HyperLogLog, a streaming cardinality estimator, is included in
this commit and used to make that determination for text.
Peter Geoghegan, reviewed by me.
Diffstat (limited to 'src/backend/executor')
-rw-r--r-- | src/backend/executor/nodeAgg.c | 4 | ||||
-rw-r--r-- | src/backend/executor/nodeMergeAppend.c | 9 | ||||
-rw-r--r-- | src/backend/executor/nodeMergejoin.c | 8 |
3 files changed, 21 insertions, 0 deletions
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 08088ea3c0c..8079d977643 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -363,6 +363,10 @@ initialize_aggregates(AggState *aggstate, * We use a plain Datum sorter when there's a single input column; * otherwise sort the full tuple. (See comments for * process_ordered_aggregate_single.) + * + * In the future, we should consider forcing the + * tuplesort_begin_heap() case when the abbreviated key + * optimization can thereby be used, even when numInputs is 1. */ peraggstate->sortstate = (peraggstate->numInputs == 1) ? diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c index 4e200a8e346..0c814f0e72d 100644 --- a/src/backend/executor/nodeMergeAppend.c +++ b/src/backend/executor/nodeMergeAppend.c @@ -137,6 +137,15 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) sortKey->ssup_nulls_first = node->nullsFirst[i]; sortKey->ssup_attno = node->sortColIdx[i]; + /* + * It isn't feasible to perform abbreviated key conversion, since + * tuples are pulled into mergestate's binary heap as needed. It would + * likely be counter-productive to convert tuples into an abbreviated + * representation as they're pulled up, so opt out of that additional + * optimization entirely. + */ + sortKey->abbreviate = false; + PrepareSortSupportFromOrderingOp(node->sortOperators[i], sortKey); } diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index 2a5a7acf945..15742c574ad 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -229,6 +229,14 @@ MJExamineQuals(List *mergeclauses, elog(ERROR, "cannot merge using non-equality operator %u", qual->opno); + /* + * sortsupport routine must know if abbreviation optimization is + * applicable in principle. It is never applicable for merge joins + * because there is no convenient opportunity to convert to alternative + * representation. + */ + clause->ssup.abbreviate = false; + /* And get the matching support or comparison function */ Assert(clause->ssup.comparator == NULL); sortfunc = get_opfamily_proc(opfamily, |