aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2006-03-07 19:06:50 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2006-03-07 19:06:50 +0000
commit8db05ba411ecd3ce2cb0cb7c78fec87658e1a4ab (patch)
tree7e0fd7aaff03dcd83ef3e4ac0232ae57c268f628 /src
parente6107da53c93dc188e257d72c3412510d2584093 (diff)
downloadpostgresql-8db05ba411ecd3ce2cb0cb7c78fec87658e1a4ab.tar.gz
postgresql-8db05ba411ecd3ce2cb0cb7c78fec87658e1a4ab.zip
Repair old performance bug in tuplesort.c/logtape.c. In the case where
we are doing the final merge pass on-the-fly, and not writing the data back onto a 'tape', the number of free blocks in the tape set will become large, leading to a lot of time wasted in ltsReleaseBlock(). There is really no need to track the free blocks anymore in this state, so add a simple shutoff switch. Per report from Stefan Kaltenbrunner.
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/sort/logtape.c29
-rw-r--r--src/backend/utils/sort/tuplesort.c50
-rw-r--r--src/include/utils/logtape.h3
3 files changed, 65 insertions, 17 deletions
diff --git a/src/backend/utils/sort/logtape.c b/src/backend/utils/sort/logtape.c
index 4f280509cad..6dc203063c9 100644
--- a/src/backend/utils/sort/logtape.c
+++ b/src/backend/utils/sort/logtape.c
@@ -64,7 +64,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/sort/logtape.c,v 1.19 2006/03/05 15:58:49 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/sort/logtape.c,v 1.20 2006/03/07 19:06:49 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -146,7 +146,12 @@ struct LogicalTapeSet
* When there are no such blocks, we extend the underlying file. Note
* that the block numbers in freeBlocks are always in *decreasing* order,
* so that removing the last entry gives us the lowest free block.
+ *
+ * If forgetFreeSpace is true then any freed blocks are simply forgotten
+ * rather than being remembered in freeBlocks[]. See notes for
+ * LogicalTapeSetForgetFreeSpace().
*/
+ bool forgetFreeSpace; /* are we remembering free blocks? */
long *freeBlocks; /* resizable array */
int nFreeBlocks; /* # of currently free blocks */
int freeBlocksLen; /* current allocated length of freeBlocks[] */
@@ -248,6 +253,12 @@ ltsReleaseBlock(LogicalTapeSet *lts, long blocknum)
long *ptr;
/*
+ * Do nothing if we're no longer interested in remembering free space.
+ */
+ if (lts->forgetFreeSpace)
+ return;
+
+ /*
* Enlarge freeBlocks array if full.
*/
if (lts->nFreeBlocks >= lts->freeBlocksLen)
@@ -491,6 +502,7 @@ LogicalTapeSetCreate(int ntapes)
(ntapes - 1) * sizeof(LogicalTape));
lts->pfile = BufFileCreateTemp(false);
lts->nFileBlocks = 0L;
+ lts->forgetFreeSpace = false;
lts->freeBlocksLen = 32; /* reasonable initial guess */
lts->freeBlocks = (long *) palloc(lts->freeBlocksLen * sizeof(long));
lts->nFreeBlocks = 0;
@@ -547,6 +559,21 @@ LogicalTapeSetClose(LogicalTapeSet *lts)
}
/*
+ * Mark a logical tape set as not needing management of free space anymore.
+ *
+ * This should be called if the caller does not intend to write any more data
+ * into the tape set, but is reading from un-frozen tapes. Since no more
+ * writes are planned, remembering free blocks is no longer useful. Setting
+ * this flag lets us avoid wasting time and space in ltsReleaseBlock(), which
+ * is not designed to handle large numbers of free blocks.
+ */
+void
+LogicalTapeSetForgetFreeSpace(LogicalTapeSet *lts)
+{
+ lts->forgetFreeSpace = true;
+}
+
+/*
* Dump the dirty buffer of a logical tape.
*/
static void
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index a9d5d79e033..af0109fda65 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -91,7 +91,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.62 2006/03/05 15:58:49 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.63 2006/03/07 19:06:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1384,7 +1384,8 @@ mergeruns(Tuplesortstate *state)
/*
* If we produced only one initial run (quite likely if the total data
* volume is between 1X and 2X workMem), we can just use that tape as the
- * finished output, rather than doing a useless merge.
+ * finished output, rather than doing a useless merge. (This obvious
+ * optimization is not in Knuth's algorithm.)
*/
if (state->currentRun == 1)
{
@@ -1401,33 +1402,51 @@ mergeruns(Tuplesortstate *state)
for (;;)
{
- /* Step D5: merge runs onto tape[T] until tape[P] is empty */
- while (state->tp_runs[state->tapeRange - 1] ||
- state->tp_dummy[state->tapeRange - 1])
+ /*
+ * At this point we know that tape[T] is empty. If there's just one
+ * (real or dummy) run left on each input tape, then only one merge
+ * pass remains. If we don't have to produce a materialized sorted
+ * tape, we can stop at this point and do the final merge on-the-fly.
+ */
+ if (!state->randomAccess)
{
- bool allDummy = true;
bool allOneRun = true;
+ Assert(state->tp_runs[state->tapeRange] == 0);
for (tapenum = 0; tapenum < state->tapeRange; tapenum++)
{
- if (state->tp_dummy[tapenum] == 0)
- allDummy = false;
if (state->tp_runs[tapenum] + state->tp_dummy[tapenum] != 1)
+ {
allOneRun = false;
+ break;
+ }
}
-
- /*
- * If we don't have to produce a materialized sorted tape, quit as
- * soon as we're down to one real/dummy run per tape.
- */
- if (!state->randomAccess && allOneRun)
+ if (allOneRun)
{
- Assert(!allDummy);
+ /* Tell logtape.c we won't be writing anymore */
+ LogicalTapeSetForgetFreeSpace(state->tapeset);
/* Initialize for the final merge pass */
beginmerge(state);
state->status = TSS_FINALMERGE;
return;
}
+ }
+
+ /* Step D5: merge runs onto tape[T] until tape[P] is empty */
+ while (state->tp_runs[state->tapeRange - 1] ||
+ state->tp_dummy[state->tapeRange - 1])
+ {
+ bool allDummy = true;
+
+ for (tapenum = 0; tapenum < state->tapeRange; tapenum++)
+ {
+ if (state->tp_dummy[tapenum] == 0)
+ {
+ allDummy = false;
+ break;
+ }
+ }
+
if (allDummy)
{
state->tp_dummy[state->tapeRange]++;
@@ -1437,6 +1456,7 @@ mergeruns(Tuplesortstate *state)
else
mergeonerun(state);
}
+
/* Step D6: decrease level */
if (--state->Level == 0)
break;
diff --git a/src/include/utils/logtape.h b/src/include/utils/logtape.h
index 3791f03e9e3..7cd3db37774 100644
--- a/src/include/utils/logtape.h
+++ b/src/include/utils/logtape.h
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/logtape.h,v 1.14 2006/03/05 15:59:07 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/logtape.h,v 1.15 2006/03/07 19:06:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -26,6 +26,7 @@ typedef struct LogicalTapeSet LogicalTapeSet;
extern LogicalTapeSet *LogicalTapeSetCreate(int ntapes);
extern void LogicalTapeSetClose(LogicalTapeSet *lts);
+extern void LogicalTapeSetForgetFreeSpace(LogicalTapeSet *lts);
extern size_t LogicalTapeRead(LogicalTapeSet *lts, int tapenum,
void *ptr, size_t size);
extern void LogicalTapeWrite(LogicalTapeSet *lts, int tapenum,