aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTomas Vondra <tomas.vondra@postgresql.org>2023-12-30 22:50:54 +0100
committerTomas Vondra <tomas.vondra@postgresql.org>2023-12-30 23:15:04 +0100
commit6c63bcbf3ccf555f02a8fd8b2acaf01a6e94cb97 (patch)
treec7bfb309c0aa09e27ba008b3c01e8bea1ef9c249 /src
parent5632d6e18a8afd586a99f680eddddc38b4ead769 (diff)
downloadpostgresql-6c63bcbf3ccf555f02a8fd8b2acaf01a6e94cb97.tar.gz
postgresql-6c63bcbf3ccf555f02a8fd8b2acaf01a6e94cb97.zip
Minor cleanup of the BRIN parallel build code
Commit b437571714 added support for parallel builds for BRIN indexes, using code similar to BTREE parallel builds, and also a new tuplesort variant. This commit simplifies the new code in two ways: * The "spool" grouping tuplesort and the heap/index is not necessary. The heap/index are available as separate arguments, causing confusion. So remove the spool, and use the tuplesort directly. * The new tuplesort variant does not need the heap/index, as it sorts simply by the range block number, without accessing the tuple data. So simplify that too. Initial report and patch by Ranier Vilela, further cleanup by me. Author: Ranier Vilela Discussion: https://postgr.es/m/CAEudQAqD7f2i4iyEaAz-5o-bf6zXVX-AkNUBm-YjUXEemaEh6A%40mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/brin/brin.c98
-rw-r--r--src/backend/utils/sort/tuplesortvariants.c26
-rw-r--r--src/include/utils/tuplesort.h4
-rw-r--r--src/tools/pgindent/typedefs.list1
4 files changed, 39 insertions, 90 deletions
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 23f081389b2..5e662a4d4ee 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -51,16 +51,6 @@
#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xB000000000000005)
/*
- * Status record for spooling/sorting phase.
- */
-typedef struct BrinSpool
-{
- Tuplesortstate *sortstate; /* state data for tuplesort.c */
- Relation heap;
- Relation index;
-} BrinSpool;
-
-/*
* Status for index builds performed in parallel. This is allocated in a
* dynamic shared memory segment.
*/
@@ -183,7 +173,13 @@ typedef struct BrinBuildState
*/
BrinLeader *bs_leader;
int bs_worker_id;
- BrinSpool *bs_spool;
+
+ /*
+ * The sortstate is used by workers (including the leader). It has to be
+ * part of the build state, because that's the only thing passed to the
+ * build callback etc.
+ */
+ Tuplesortstate *bs_sortstate;
} BrinBuildState;
/*
@@ -231,12 +227,11 @@ static void brin_fill_empty_ranges(BrinBuildState *state,
/* parallel index builds */
static void _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index,
bool isconcurrent, int request);
-static void _brin_end_parallel(BrinLeader *btleader, BrinBuildState *state);
+static void _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state);
static Size _brin_parallel_estimate_shared(Relation heap, Snapshot snapshot);
static void _brin_leader_participate_as_worker(BrinBuildState *buildstate,
Relation heap, Relation index);
static void _brin_parallel_scan_and_build(BrinBuildState *buildstate,
- BrinSpool *brinspool,
BrinShared *brinshared,
Sharedsort *sharedsort,
Relation heap, Relation index,
@@ -1143,10 +1138,6 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
state = initialize_brin_buildstate(index, revmap, pagesPerRange,
RelationGetNumberOfBlocks(heap));
- state->bs_spool = (BrinSpool *) palloc0(sizeof(BrinSpool));
- state->bs_spool->heap = heap;
- state->bs_spool->index = index;
-
/*
* Attempt to launch parallel worker scan when required
*
@@ -1160,11 +1151,13 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
indexInfo->ii_ParallelWorkers);
/*
- * Now scan the relation. No syncscan allowed here because we want the
- * heap blocks in physical order.
- *
* If parallel build requested and at least one worker process was
- * successfully launched, set up coordination state
+ * successfully launched, set up coordination state, wait for workers to
+ * complete. Then read all tuples from the shared tuplesort and insert
+ * them into the index.
+ *
+ * In serial mode, simply scan the table and build the index one index
+ * tuple at a time.
*/
if (state->bs_leader)
{
@@ -1176,9 +1169,8 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
state->bs_leader->nparticipanttuplesorts;
coordinate->sharedsort = state->bs_leader->sharedsort;
-
/*
- * Begin serial/leader tuplesort.
+ * Begin leader tuplesort.
*
* In cases where parallelism is involved, the leader receives the
* same share of maintenance_work_mem as a serial sort (it is
@@ -1199,19 +1191,18 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* INDEX operation, regardless of the use of parallelism or any other
* factor.
*/
- state->bs_spool->sortstate =
- tuplesort_begin_index_brin(heap, index,
- maintenance_work_mem, coordinate,
+ state->bs_sortstate =
+ tuplesort_begin_index_brin(maintenance_work_mem, coordinate,
TUPLESORT_NONE);
- /*
- * In parallel mode, wait for workers to complete, and then read all
- * tuples from the shared tuplesort and insert them into the index.
- */
_brin_end_parallel(state->bs_leader, state);
}
else /* no parallel index build */
{
+ /*
+ * Now scan the relation. No syncscan allowed here because we want
+ * the heap blocks in physical order.
+ */
reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
brinbuildCallback, (void *) state, NULL);
@@ -1671,7 +1662,7 @@ initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap,
state->bs_dtuple = brin_new_memtuple(state->bs_bdesc);
state->bs_leader = NULL;
state->bs_worker_id = 0;
- state->bs_spool = NULL;
+ state->bs_sortstate = NULL;
state->bs_context = CurrentMemoryContext;
state->bs_emptyTuple = NULL;
state->bs_emptyTupleLen = 0;
@@ -2002,7 +1993,7 @@ form_and_spill_tuple(BrinBuildState *state)
state->bs_dtuple, &size);
/* write the BRIN tuple to the tuplesort */
- tuplesort_putbrintuple(state->bs_spool->sortstate, tup, size);
+ tuplesort_putbrintuple(state->bs_sortstate, tup, size);
state->bs_numtuples++;
@@ -2522,7 +2513,6 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
Size tuplen;
BrinShared *brinshared = brinleader->brinshared;
BlockNumber prevblkno = InvalidBlockNumber;
- BrinSpool *spool;
MemoryContext rangeCxt,
oldCxt;
@@ -2541,8 +2531,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
state->bs_numtuples = brinshared->indtuples;
/* do the actual sort in the leader */
- spool = state->bs_spool;
- tuplesort_performsort(spool->sortstate);
+ tuplesort_performsort(state->bs_sortstate);
/*
* Initialize BrinMemTuple we'll use to union summaries from workers (in
@@ -2568,7 +2557,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
* That probably gives us an index that is cheaper to scan, thanks to
* mostly getting data from the same index page as before.
*/
- while ((btup = tuplesort_getbrintuple(spool->sortstate, &tuplen, true)) != NULL)
+ while ((btup = tuplesort_getbrintuple(state->bs_sortstate, &tuplen, true)) != NULL)
{
/* Ranges should be multiples of pages_per_range for the index. */
Assert(btup->bt_blkno % brinshared->pagesPerRange == 0);
@@ -2640,7 +2629,7 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
prevblkno = btup->bt_blkno;
}
- tuplesort_end(spool->sortstate);
+ tuplesort_end(state->bs_sortstate);
/* Fill the BRIN tuple for the last page range with data. */
if (prevblkno != InvalidBlockNumber)
@@ -2704,11 +2693,6 @@ _brin_leader_participate_as_worker(BrinBuildState *buildstate, Relation heap, Re
BrinLeader *brinleader = buildstate->bs_leader;
int sortmem;
- /* Allocate memory and initialize private spool */
- buildstate->bs_spool = (BrinSpool *) palloc0(sizeof(BrinSpool));
- buildstate->bs_spool->heap = buildstate->bs_spool->heap;
- buildstate->bs_spool->index = buildstate->bs_spool->index;
-
/*
* Might as well use reliable figure when doling out maintenance_work_mem
* (when requested number of workers were not launched, this will be
@@ -2717,16 +2701,14 @@ _brin_leader_participate_as_worker(BrinBuildState *buildstate, Relation heap, Re
sortmem = maintenance_work_mem / brinleader->nparticipanttuplesorts;
/* Perform work common to all participants */
- _brin_parallel_scan_and_build(buildstate, buildstate->bs_spool, brinleader->brinshared,
+ _brin_parallel_scan_and_build(buildstate, brinleader->brinshared,
brinleader->sharedsort, heap, index, sortmem, true);
}
/*
* Perform a worker's portion of a parallel sort.
*
- * This generates a tuplesort for passed btspool, and a second tuplesort
- * state if a second btspool is need (i.e. for unique index builds). All
- * other spool fields should already be set when this is called.
+ * This generates a tuplesort for the worker portion of the table.
*
* sortmem is the amount of working memory to use within each worker,
* expressed in KBs.
@@ -2734,10 +2716,10 @@ _brin_leader_participate_as_worker(BrinBuildState *buildstate, Relation heap, Re
* When this returns, workers are done, and need only release resources.
*/
static void
-_brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
+_brin_parallel_scan_and_build(BrinBuildState *state,
BrinShared *brinshared, Sharedsort *sharedsort,
- Relation heap, Relation index, int sortmem,
- bool progress)
+ Relation heap, Relation index,
+ int sortmem, bool progress)
{
SortCoordinate coordinate;
TableScanDesc scan;
@@ -2751,10 +2733,8 @@ _brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
coordinate->sharedsort = sharedsort;
/* Begin "partial" tuplesort */
- brinspool->sortstate = tuplesort_begin_index_brin(brinspool->heap,
- brinspool->index,
- sortmem, coordinate,
- TUPLESORT_NONE);
+ state->bs_sortstate = tuplesort_begin_index_brin(sortmem, coordinate,
+ TUPLESORT_NONE);
/* Join parallel scan */
indexInfo = BuildIndexInfo(index);
@@ -2770,7 +2750,7 @@ _brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
form_and_spill_tuple(state);
/* sort the BRIN ranges built by this worker */
- tuplesort_performsort(brinspool->sortstate);
+ tuplesort_performsort(state->bs_sortstate);
state->bs_reltuples += reltuples;
@@ -2786,7 +2766,7 @@ _brin_parallel_scan_and_build(BrinBuildState *state, BrinSpool *brinspool,
/* Notify leader */
ConditionVariableSignal(&brinshared->workersdonecv);
- tuplesort_end(brinspool->sortstate);
+ tuplesort_end(state->bs_sortstate);
}
/*
@@ -2844,11 +2824,6 @@ _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
brinshared->pagesPerRange,
InvalidBlockNumber);
- /* Initialize worker's own spool */
- buildstate->bs_spool = (BrinSpool *) palloc0(sizeof(BrinSpool));
- buildstate->bs_spool->heap = heapRel;
- buildstate->bs_spool->index = indexRel;
-
/* Look up shared state private to tuplesort.c */
sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
tuplesort_attach_shared(sharedsort, seg);
@@ -2863,8 +2838,7 @@ _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
*/
sortmem = maintenance_work_mem / brinshared->scantuplesortstates;
- _brin_parallel_scan_and_build(buildstate, buildstate->bs_spool,
- brinshared, sharedsort,
+ _brin_parallel_scan_and_build(buildstate, brinshared, sharedsort,
heapRel, indexRel, sortmem, false);
/* Report WAL/buffer usage during parallel execution */
diff --git a/src/backend/utils/sort/tuplesortvariants.c b/src/backend/utils/sort/tuplesortvariants.c
index 90fc605f1ca..27425880a5f 100644
--- a/src/backend/utils/sort/tuplesortvariants.c
+++ b/src/backend/utils/sort/tuplesortvariants.c
@@ -138,16 +138,6 @@ typedef struct
} TuplesortIndexHashArg;
/*
- * Data struture pointed by "TuplesortPublic.arg" for the index_brin subcase.
- */
-typedef struct
-{
- TuplesortIndexArg index;
-
- /* XXX do we need something here? */
-} TuplesortIndexBrinArg;
-
-/*
* Data struture pointed by "TuplesortPublic.arg" for the Datum case.
* Set by tuplesort_begin_datum and used only by the DatumTuple routines.
*/
@@ -562,20 +552,13 @@ tuplesort_begin_index_gist(Relation heapRel,
}
Tuplesortstate *
-tuplesort_begin_index_brin(Relation heapRel,
- Relation indexRel,
- int workMem,
+tuplesort_begin_index_brin(int workMem,
SortCoordinate coordinate,
int sortopt)
{
Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
sortopt);
TuplesortPublic *base = TuplesortstateGetPublic(state);
- MemoryContext oldcontext;
- TuplesortIndexBrinArg *arg;
-
- oldcontext = MemoryContextSwitchTo(base->maincontext);
- arg = (TuplesortIndexBrinArg *) palloc(sizeof(TuplesortIndexBrinArg));
#ifdef TRACE_SORT
if (trace_sort)
@@ -592,12 +575,7 @@ tuplesort_begin_index_brin(Relation heapRel,
base->writetup = writetup_index_brin;
base->readtup = readtup_index_brin;
base->haveDatum1 = true;
- base->arg = arg;
-
- arg->index.heapRel = heapRel;
- arg->index.indexRel = indexRel;
-
- MemoryContextSwitchTo(oldcontext);
+ base->arg = NULL;
return state;
}
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index 357eb35311d..103ced4005f 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -430,9 +430,7 @@ extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel,
Relation indexRel,
int workMem, SortCoordinate coordinate,
int sortopt);
-extern Tuplesortstate *tuplesort_begin_index_brin(Relation heapRel,
- Relation indexRel,
- int workMem, SortCoordinate coordinate,
+extern Tuplesortstate *tuplesort_begin_index_brin(int workMem, SortCoordinate coordinate,
int sortopt);
extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
Oid sortOperator, Oid sortCollation,
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index e37ef9aa76d..3310726cdd6 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -307,7 +307,6 @@ BrinRevmap
BrinShared
BrinSortTuple
BrinSpecialSpace
-BrinSpool
BrinStatsData
BrinTuple
BrinValues