Fix rescan logic in nodeCtescan.

The previous coding essentially assumed that nodes would be rescanned in the same order they were initialized in; or at least that the "leader" of a group of CTEscans would be rescanned before any others were required to execute. Unfortunately, that isn't even a little bit true. It's possible to devise queries in which the leader isn't rescanned until other CTEscans on the same CTE have run to completion, or even in which the leader never gets a rescan call at all. The fix makes the leader specially responsible only for initial creation and final destruction of the tuplestore; rescan resets are now a symmetrically shared responsibility. This means that we might reset the tuplestore multiple times when restarting a plan subtree containing multiple CTEscans; but resetting an already-empty tuplestore is cheap enough that that doesn't seem like a problem. Per report from Adam Mackler; the new regression test cases are based on his example query. Back-patch to 8.4 where CTE scans were introduced.
author: Tom Lane <tgl@sss.pgh.pa.us> 2012-08-15 19:01:39 -0400
committer: Tom Lane <tgl@sss.pgh.pa.us> 2012-08-15 19:01:39 -0400
commit: dda44070cc9c9bb3d0fedcca4e9610ea7ec8ee77 (patch)
tree: 2812d8834225a8c38894b06564a076606ea7369e /src
parent: 1a36a773a6352de81d8b207f87b7259fbd756b16 (diff)
download: postgresql-dda44070cc9c9bb3d0fedcca4e9610ea7ec8ee77.tar.gz
postgresql-dda44070cc9c9bb3d0fedcca4e9610ea7ec8ee77.zip
3 files changed, 151 insertions, 17 deletions
diff --git a/src/backend/executor/nodeCtescan.c b/src/backend/executor/nodeCtescan.c
index 81469c41d7c..3ecff83bc1e 100644
--- a/src/backend/executor/nodeCtescan.c
+++ b/src/backend/executor/nodeCtescan.c
@@ -196,7 +196,7 @@ ExecInitCteScan(CteScan *node, EState *estate, int eflags)
 	 * The Param slot associated with the CTE query is used to hold a pointer
 	 * to the CteState of the first CteScan node that initializes for this
 	 * CTE.  This node will be the one that holds the shared state for all the
-	 * CTEs.
+	 * CTEs, particularly the shared tuplestore.
 	 */
 	prmdata = &(estate->es_param_exec_vals[node->cteParam]);
 	Assert(prmdata->execPlan == NULL);
@@ -295,7 +295,10 @@ ExecEndCteScan(CteScanState *node)
 	 * If I am the leader, free the tuplestore.
 	 */
 	if (node->leader == node)
+	{
 		tuplestore_end(node->cte_table);
+		node->cte_table = NULL;
+	}
 }
 
 /* ----------------------------------------------------------------
@@ -312,26 +315,26 @@ ExecCteScanReScan(CteScanState *node, ExprContext *exprCtxt)
 	ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
 	node->ss.ps.ps_TupFromTlist = false;
 
-	if (node->leader == node)
+	/*
+	 * Clear the tuplestore if a new scan of the underlying CTE is required.
+	 * This implicitly resets all the tuplestore's read pointers.  Note that
+	 * multiple CTE nodes might redundantly clear the tuplestore; that's OK,
+	 * and not unduly expensive.  We'll stop taking this path as soon as
+	 * somebody has attempted to read something from the underlying CTE
+	 * (thereby causing its chgParam to be cleared).
+	 */
+	if (node->leader->cteplanstate->chgParam != NULL)
 	{
-		/*
-		 * The leader is responsible for clearing the tuplestore if a new scan
-		 * of the underlying CTE is required.
-		 */
-		if (node->cteplanstate->chgParam != NULL)
-		{
-			tuplestore_clear(tuplestorestate);
-			node->eof_cte = false;
-		}
-		else
-		{
-			tuplestore_select_read_pointer(tuplestorestate, node->readptr);
-			tuplestore_rescan(tuplestorestate);
-		}
+		tuplestore_clear(tuplestorestate);
+		node->leader->eof_cte = false;
 	}
 	else
 	{
-		/* Not leader, so just rewind my own pointer */
+		/*
+		 * Else, just rewind my own pointer.  Either the underlying CTE
+		 * doesn't need a rescan (and we can re-read what's in the tuplestore
+		 * now), or somebody else already took care of it.
+		 */
 		tuplestore_select_read_pointer(tuplestorestate, node->readptr);
 		tuplestore_rescan(tuplestorestate);
 	}
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index 7db4d7cac75..65921393a6a 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -1077,3 +1077,78 @@ SELECT * FROM outermost;
 ERROR:  recursive reference to query "outermost" must not appear within a subquery
 LINE 2:   WITH innermost as (SELECT 2 FROM outermost) 
                                            ^
+--
+-- Test CTEs read in non-initialization orders
+--
+WITH RECURSIVE
+  tab(id_key,link) AS (VALUES (1,17), (2,17), (3,17), (4,17), (6,17), (5,17)),
+  iter (id_key, row_type, link) AS (
+      SELECT 0, 'base', 17
+    UNION ALL (
+      WITH remaining(id_key, row_type, link, min) AS (
+        SELECT tab.id_key, 'true'::text, iter.link, MIN(tab.id_key) OVER ()
+        FROM tab INNER JOIN iter USING (link)
+        WHERE tab.id_key > iter.id_key
+      ),
+      first_remaining AS (
+        SELECT id_key, row_type, link
+        FROM remaining
+        WHERE id_key=min
+      ),
+      effect AS (
+        SELECT tab.id_key, 'new'::text, tab.link
+        FROM first_remaining e INNER JOIN tab ON e.id_key=tab.id_key
+        WHERE e.row_type = 'false'
+      )
+      SELECT * FROM first_remaining
+      UNION ALL SELECT * FROM effect
+    )
+  )
+SELECT * FROM iter;
+ id_key | row_type | link 
+--------+----------+------
+      0 | base     |   17
+      1 | true     |   17
+      2 | true     |   17
+      3 | true     |   17
+      4 | true     |   17
+      5 | true     |   17
+      6 | true     |   17
+(7 rows)
+
+WITH RECURSIVE
+  tab(id_key,link) AS (VALUES (1,17), (2,17), (3,17), (4,17), (6,17), (5,17)),
+  iter (id_key, row_type, link) AS (
+      SELECT 0, 'base', 17
+    UNION (
+      WITH remaining(id_key, row_type, link, min) AS (
+        SELECT tab.id_key, 'true'::text, iter.link, MIN(tab.id_key) OVER ()
+        FROM tab INNER JOIN iter USING (link)
+        WHERE tab.id_key > iter.id_key
+      ),
+      first_remaining AS (
+        SELECT id_key, row_type, link
+        FROM remaining
+        WHERE id_key=min
+      ),
+      effect AS (
+        SELECT tab.id_key, 'new'::text, tab.link
+        FROM first_remaining e INNER JOIN tab ON e.id_key=tab.id_key
+        WHERE e.row_type = 'false'
+      )
+      SELECT * FROM first_remaining
+      UNION ALL SELECT * FROM effect
+    )
+  )
+SELECT * FROM iter;
+ id_key | row_type | link 
+--------+----------+------
+      0 | base     |   17
+      1 | true     |   17
+      2 | true     |   17
+      3 | true     |   17
+      4 | true     |   17
+      5 | true     |   17
+      6 | true     |   17
+(7 rows)
+
diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql
index e74ebd76888..697eaa71f40 100644
--- a/src/test/regress/sql/with.sql
+++ b/src/test/regress/sql/with.sql
@@ -535,3 +535,59 @@ WITH RECURSIVE outermost(x) AS (
     UNION SELECT * from outermost
 )
 SELECT * FROM outermost;
+
+--
+-- Test CTEs read in non-initialization orders
+--
+
+WITH RECURSIVE
+  tab(id_key,link) AS (VALUES (1,17), (2,17), (3,17), (4,17), (6,17), (5,17)),
+  iter (id_key, row_type, link) AS (
+      SELECT 0, 'base', 17
+    UNION ALL (
+      WITH remaining(id_key, row_type, link, min) AS (
+        SELECT tab.id_key, 'true'::text, iter.link, MIN(tab.id_key) OVER ()
+        FROM tab INNER JOIN iter USING (link)
+        WHERE tab.id_key > iter.id_key
+      ),
+      first_remaining AS (
+        SELECT id_key, row_type, link
+        FROM remaining
+        WHERE id_key=min
+      ),
+      effect AS (
+        SELECT tab.id_key, 'new'::text, tab.link
+        FROM first_remaining e INNER JOIN tab ON e.id_key=tab.id_key
+        WHERE e.row_type = 'false'
+      )
+      SELECT * FROM first_remaining
+      UNION ALL SELECT * FROM effect
+    )
+  )
+SELECT * FROM iter;
+
+WITH RECURSIVE
+  tab(id_key,link) AS (VALUES (1,17), (2,17), (3,17), (4,17), (6,17), (5,17)),
+  iter (id_key, row_type, link) AS (
+      SELECT 0, 'base', 17
+    UNION (
+      WITH remaining(id_key, row_type, link, min) AS (
+        SELECT tab.id_key, 'true'::text, iter.link, MIN(tab.id_key) OVER ()
+        FROM tab INNER JOIN iter USING (link)
+        WHERE tab.id_key > iter.id_key
+      ),
+      first_remaining AS (
+        SELECT id_key, row_type, link
+        FROM remaining
+        WHERE id_key=min
+      ),
+      effect AS (
+        SELECT tab.id_key, 'new'::text, tab.link
+        FROM first_remaining e INNER JOIN tab ON e.id_key=tab.id_key
+        WHERE e.row_type = 'false'
+      )
+      SELECT * FROM first_remaining
+      UNION ALL SELECT * FROM effect
+    )
+  )
+SELECT * FROM iter;
author	Tom Lane <tgl@sss.pgh.pa.us>	2012-08-15 19:01:39 -0400
committer	Tom Lane <tgl@sss.pgh.pa.us>	2012-08-15 19:01:39 -0400
commit	dda44070cc9c9bb3d0fedcca4e9610ea7ec8ee77 (patch)
tree	2812d8834225a8c38894b06564a076606ea7369e /src
parent	1a36a773a6352de81d8b207f87b7259fbd756b16 (diff)
download	postgresql-dda44070cc9c9bb3d0fedcca4e9610ea7ec8ee77.tar.gz postgresql-dda44070cc9c9bb3d0fedcca4e9610ea7ec8ee77.zip