Optimize creation of slots for FDW bulk inserts

Commit b663a41363 introduced bulk inserts for FDW, but the handling of tuple slots turned out to be problematic for two reasons. Firstly, the slots were re-created for each individual batch. Secondly, all slots referenced the same tuple descriptor - with reasonably small batches this is not an issue, but with large batches this triggers O(N^2) behavior in the resource owner code. These two issues work against each other - to reduce the number of times a slot has to be created/dropped, larger batches are needed. However, the larger the batch, the more expensive the resource owner gets. For practical batch sizes (100 - 1000) this would not be a big problem, as the benefits (latency savings) greatly exceed the resource owner costs. But for extremely large batches it might be much worse, possibly even losing with non-batching mode. Fixed by initializing tuple slots only once (and reusing them across batches) and by using a new tuple descriptor copy for each slot. Discussion: https://postgr.es/m/ebbbcc7d-4286-8c28-0272-61b4753af761%40enterprisedb.com
author: Tomas Vondra <tomas.vondra@postgresql.org> 2021-06-11 20:19:48 +0200
committer: Tomas Vondra <tomas.vondra@postgresql.org> 2021-06-11 20:23:33 +0200
commit: b676ac443b6a83558d4701b2dd9491c0b37e17c4 (patch)
tree: 2c2b6679178de4a7151f5781dcff723c6dcc85cc
parent: 96540f80f8334a3f0f4a13f0d42e4565d8fa9eb7 (diff)
download: postgresql-b676ac443b6a83558d4701b2dd9491c0b37e17c4.tar.gz
postgresql-b676ac443b6a83558d4701b2dd9491c0b37e17c4.zip
2 files changed, 37 insertions, 16 deletions
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 379b0563105..88c479c6da3 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -703,16 +703,31 @@ ExecInsert(ModifyTableState *mtstate,
 													 resultRelInfo->ri_BatchSize);
 			}
 
-			resultRelInfo->ri_Slots[resultRelInfo->ri_NumSlots] =
-				MakeSingleTupleTableSlot(slot->tts_tupleDescriptor,
-										 slot->tts_ops);
-			ExecCopySlot(resultRelInfo->ri_Slots[resultRelInfo->ri_NumSlots],
-						 slot);
-			resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots] =
-				MakeSingleTupleTableSlot(planSlot->tts_tupleDescriptor,
-										 planSlot->tts_ops);
-			ExecCopySlot(resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots],
-						 planSlot);
+			/*
+			 * Initialize the batch slots. We don't know how many slots will be
+			 * needed, so we initialize them as the batch grows, and we keep
+			 * them across batches. To mitigate an inefficiency in how resource
+			 * owner handles objects with many references (as with many slots
+			 * all referencing the same tuple descriptor) we copy the tuple
+			 * descriptor for each slot.
+			 */
+			if (resultRelInfo->ri_NumSlots >= resultRelInfo->ri_NumSlotsInitialized)
+			{
+				TupleDesc tdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
+
+				resultRelInfo->ri_Slots[resultRelInfo->ri_NumSlots] =
+					MakeSingleTupleTableSlot(tdesc, slot->tts_ops);
+				ExecCopySlot(resultRelInfo->ri_Slots[resultRelInfo->ri_NumSlots],
+							 slot);
+
+				resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots] =
+					MakeSingleTupleTableSlot(tdesc, planSlot->tts_ops);
+				ExecCopySlot(resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots],
+							 planSlot);
+
+				/* remember how many batch slots we initialized */
+				resultRelInfo->ri_NumSlotsInitialized++;
+			}
 
 			resultRelInfo->ri_NumSlots++;
 
@@ -1034,12 +1049,6 @@ ExecBatchInsert(ModifyTableState *mtstate,
 
 	if (canSetTag && numInserted > 0)
 		estate->es_processed += numInserted;
-
-	for (i = 0; i < numSlots; i++)
-	{
-		ExecDropSingleTupleTableSlot(slots[i]);
-		ExecDropSingleTupleTableSlot(planSlots[i]);
-	}
 }
 
 /* ----------------------------------------------------------------
@@ -3162,6 +3171,7 @@ ExecEndModifyTable(ModifyTableState *node)
 	 */
 	for (i = 0; i < node->mt_nrels; i++)
 	{
+		int j;
 		ResultRelInfo *resultRelInfo = node->resultRelInfo + i;
 
 		if (!resultRelInfo->ri_usesFdwDirectModify &&
@@ -3169,6 +3179,16 @@ ExecEndModifyTable(ModifyTableState *node)
 			resultRelInfo->ri_FdwRoutine->EndForeignModify != NULL)
 			resultRelInfo->ri_FdwRoutine->EndForeignModify(node->ps.state,
 														   resultRelInfo);
+
+		/*
+		 * Cleanup the initialized batch slots. This only matters for FDWs with
+		 * batching, but the other cases will have ri_NumSlotsInitialized == 0.
+		 */
+		for (j = 0; j < resultRelInfo->ri_NumSlotsInitialized; j++)
+		{
+			ExecDropSingleTupleTableSlot(resultRelInfo->ri_Slots[j]);
+			ExecDropSingleTupleTableSlot(resultRelInfo->ri_PlanSlots[j]);
+		}
 	}
 
 	/*
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 7795a694905..9a5ca7b3dbf 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -462,6 +462,7 @@ typedef struct ResultRelInfo
 
 	/* batch insert stuff */
 	int			ri_NumSlots;	/* number of slots in the array */
+	int			ri_NumSlotsInitialized;	/* number of initialized slots */
 	int			ri_BatchSize;	/* max slots inserted in a single batch */
 	TupleTableSlot **ri_Slots;	/* input tuples for batch insert */
 	TupleTableSlot **ri_PlanSlots;
author	Tomas Vondra <tomas.vondra@postgresql.org>	2021-06-11 20:19:48 +0200
committer	Tomas Vondra <tomas.vondra@postgresql.org>	2021-06-11 20:23:33 +0200
commit	b676ac443b6a83558d4701b2dd9491c0b37e17c4 (patch)
tree	2c2b6679178de4a7151f5781dcff723c6dcc85cc
parent	96540f80f8334a3f0f4a13f0d42e4565d8fa9eb7 (diff)
download	postgresql-b676ac443b6a83558d4701b2dd9491c0b37e17c4.tar.gz postgresql-b676ac443b6a83558d4701b2dd9491c0b37e17c4.zip