Add support for doing late row locking in FDWs.

Previously, FDWs could only do "early row locking", that is lock a row as soon as it's fetched, even though local restriction/join conditions might discard the row later. This patch adds callbacks that allow FDWs to do late locking in the same way that it's done for regular tables. To make use of this feature, an FDW must support the "ctid" column as a unique row identifier. Currently, since ctid has to be of type TID, the feature is of limited use, though in principle it could be used by postgres_fdw. We may eventually allow FDWs to specify another data type for ctid, which would make it possible for more FDWs to use this feature. This commit does not modify postgres_fdw to use late locking. We've tested some prototype code for that, but it's not in committable shape, and besides it's quite unclear whether it actually makes sense to do late locking against a remote server. The extra round trips required are likely to outweigh any benefit from improved concurrency. Etsuro Fujita, reviewed by Ashutosh Bapat, and hacked up a lot by me
author: Tom Lane <tgl@sss.pgh.pa.us> 2015-05-12 14:10:10 -0400
committer: Tom Lane <tgl@sss.pgh.pa.us> 2015-05-12 14:10:17 -0400
commit: afb9249d06f47d7a6d4a89fea0c3625fe43c5a5d (patch)
tree: e7f62e6cb2b6baefa9489536966229e4af912695 /src/backend/executor
parent: aa4a0b9571232f44e4b8d9effca3c540e657cebb (diff)
download: postgresql-afb9249d06f47d7a6d4a89fea0c3625fe43c5a5d.tar.gz
postgresql-afb9249d06f47d7a6d4a89fea0c3625fe43c5a5d.zip
4 files changed, 156 insertions, 75 deletions
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 0dee9491788..43d3c44c827 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -898,8 +898,11 @@ InitPlan(QueryDesc *queryDesc, int eflags)
 		erm->prti = rc->prti;
 		erm->rowmarkId = rc->rowmarkId;
 		erm->markType = rc->markType;
+		erm->strength = rc->strength;
 		erm->waitPolicy = rc->waitPolicy;
+		erm->ermActive = false;
 		ItemPointerSetInvalid(&(erm->curCtid));
+		erm->ermExtra = NULL;
 		estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
 	}
 
@@ -1143,6 +1146,8 @@ CheckValidResultRel(Relation resultRel, CmdType operation)
 static void
 CheckValidRowMarkRel(Relation rel, RowMarkType markType)
 {
+	FdwRoutine *fdwroutine;
+
 	switch (rel->rd_rel->relkind)
 	{
 		case RELKIND_RELATION:
@@ -1178,11 +1183,13 @@ CheckValidRowMarkRel(Relation rel, RowMarkType markType)
 							  RelationGetRelationName(rel))));
 			break;
 		case RELKIND_FOREIGN_TABLE:
-			/* Should not get here; planner should have used ROW_MARK_COPY */
-			ereport(ERROR,
-					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-					 errmsg("cannot lock rows in foreign table \"%s\"",
-							RelationGetRelationName(rel))));
+			/* Okay only if the FDW supports it */
+			fdwroutine = GetFdwRoutineForRelation(rel, false);
+			if (fdwroutine->RefetchForeignRow == NULL)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot lock rows in foreign table \"%s\"",
+								RelationGetRelationName(rel))));
 			break;
 		default:
 			ereport(ERROR,
@@ -2005,9 +2012,11 @@ ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo)
 
 /*
  * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index
+ *
+ * If no such struct, either return NULL or throw error depending on missing_ok
  */
 ExecRowMark *
-ExecFindRowMark(EState *estate, Index rti)
+ExecFindRowMark(EState *estate, Index rti, bool missing_ok)
 {
 	ListCell   *lc;
 
@@ -2018,8 +2027,9 @@ ExecFindRowMark(EState *estate, Index rti)
 		if (erm->rti == rti)
 			return erm;
 	}
-	elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti);
-	return NULL;				/* keep compiler quiet */
+	if (!missing_ok)
+		elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti);
+	return NULL;
 }
 
 /*
@@ -2530,7 +2540,7 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
 
 		if (erm->markType == ROW_MARK_REFERENCE)
 		{
-			Buffer		buffer;
+			HeapTuple	copyTuple;
 
 			Assert(erm->relation != NULL);
 
@@ -2541,17 +2551,50 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
 			/* non-locked rels could be on the inside of outer joins */
 			if (isNull)
 				continue;
-			tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
 
-			/* okay, fetch the tuple */
-			if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
-							false, NULL))
-				elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
+			/* fetch requests on foreign tables must be passed to their FDW */
+			if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+			{
+				FdwRoutine *fdwroutine;
+				bool		updated = false;
 
-			/* successful, copy and store tuple */
-			EvalPlanQualSetTuple(epqstate, erm->rti,
-								 heap_copytuple(&tuple));
-			ReleaseBuffer(buffer);
+				fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
+				/* this should have been checked already, but let's be safe */
+				if (fdwroutine->RefetchForeignRow == NULL)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot lock rows in foreign table \"%s\"",
+									RelationGetRelationName(erm->relation))));
+				copyTuple = fdwroutine->RefetchForeignRow(epqstate->estate,
+														  erm,
+														  datum,
+														  &updated);
+				if (copyTuple == NULL)
+					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
+
+				/*
+				 * Ideally we'd insist on updated == false here, but that
+				 * assumes that FDWs can track that exactly, which they might
+				 * not be able to.  So just ignore the flag.
+				 */
+			}
+			else
+			{
+				/* ordinary table, fetch the tuple */
+				Buffer		buffer;
+
+				tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
+				if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
+								false, NULL))
+					elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
+
+				/* successful, copy tuple */
+				copyTuple = heap_copytuple(&tuple);
+				ReleaseBuffer(buffer);
+			}
+
+			/* store tuple */
+			EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple);
 		}
 		else
 		{
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 88ba16bc6da..0da8e53e816 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -805,20 +805,11 @@ ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
 		lockmode = NoLock;
 	else
 	{
-		ListCell   *l;
+		/* Keep this check in sync with InitPlan! */
+		ExecRowMark *erm = ExecFindRowMark(estate, scanrelid, true);
 
-		foreach(l, estate->es_rowMarks)
-		{
-			ExecRowMark *erm = lfirst(l);
-
-			/* Keep this check in sync with InitPlan! */
-			if (erm->rti == scanrelid &&
-				erm->relation != NULL)
-			{
-				lockmode = NoLock;
-				break;
-			}
-		}
+		if (erm != NULL && erm->relation != NULL)
+			lockmode = NoLock;
 	}
 
 	/* Open the relation and acquire lock as needed */
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
index 5ae106c06ad..7bcf99f4889 100644
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -25,6 +25,7 @@
 #include "access/xact.h"
 #include "executor/executor.h"
 #include "executor/nodeLockRows.h"
+#include "foreign/fdwapi.h"
 #include "storage/bufmgr.h"
 #include "utils/rel.h"
 #include "utils/tqual.h"
@@ -40,7 +41,7 @@ ExecLockRows(LockRowsState *node)
 	TupleTableSlot *slot;
 	EState	   *estate;
 	PlanState  *outerPlan;
-	bool		epq_started;
+	bool		epq_needed;
 	ListCell   *lc;
 
 	/*
@@ -58,15 +59,18 @@ lnext:
 	if (TupIsNull(slot))
 		return NULL;
 
+	/* We don't need EvalPlanQual unless we get updated tuple version(s) */
+	epq_needed = false;
+
 	/*
 	 * Attempt to lock the source tuple(s).  (Note we only have locking
 	 * rowmarks in lr_arowMarks.)
 	 */
-	epq_started = false;
 	foreach(lc, node->lr_arowMarks)
 	{
 		ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(lc);
 		ExecRowMark *erm = aerm->rowmark;
+		HeapTuple  *testTuple;
 		Datum		datum;
 		bool		isNull;
 		HeapTupleData tuple;
@@ -77,8 +81,10 @@ lnext:
 		HeapTuple	copyTuple;
 
 		/* clear any leftover test tuple for this rel */
-		if (node->lr_epqstate.estate != NULL)
-			EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, NULL);
+		testTuple = &(node->lr_curtuples[erm->rti - 1]);
+		if (*testTuple != NULL)
+			heap_freetuple(*testTuple);
+		*testTuple = NULL;
 
 		/* if child rel, must check whether it produced this row */
 		if (erm->rti != erm->prti)
@@ -97,10 +103,12 @@ lnext:
 			if (tableoid != erm->relid)
 			{
 				/* this child is inactive right now */
+				erm->ermActive = false;
 				ItemPointerSetInvalid(&(erm->curCtid));
 				continue;
 			}
 		}
+		erm->ermActive = true;
 
 		/* fetch the tuple's ctid */
 		datum = ExecGetJunkAttribute(slot,
@@ -109,9 +117,45 @@ lnext:
 		/* shouldn't ever get a null result... */
 		if (isNull)
 			elog(ERROR, "ctid is NULL");
-		tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
+
+		/* requests for foreign tables must be passed to their FDW */
+		if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+		{
+			FdwRoutine *fdwroutine;
+			bool		updated = false;
+
+			fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
+			/* this should have been checked already, but let's be safe */
+			if (fdwroutine->RefetchForeignRow == NULL)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("cannot lock rows in foreign table \"%s\"",
+								RelationGetRelationName(erm->relation))));
+			copyTuple = fdwroutine->RefetchForeignRow(estate,
+													  erm,
+													  datum,
+													  &updated);
+			if (copyTuple == NULL)
+			{
+				/* couldn't get the lock, so skip this row */
+				goto lnext;
+			}
+
+			/* save locked tuple for possible EvalPlanQual testing below */
+			*testTuple = copyTuple;
+
+			/*
+			 * if FDW says tuple was updated before getting locked, we need to
+			 * perform EPQ testing to see if quals are still satisfied
+			 */
+			if (updated)
+				epq_needed = true;
+
+			continue;
+		}
 
 		/* okay, try to lock the tuple */
+		tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
 		switch (erm->markType)
 		{
 			case ROW_MARK_EXCLUSIVE:
@@ -191,40 +235,11 @@ lnext:
 				/* remember the actually locked tuple's TID */
 				tuple.t_self = copyTuple->t_self;
 
-				/*
-				 * Need to run a recheck subquery.  Initialize EPQ state if we
-				 * didn't do so already.
-				 */
-				if (!epq_started)
-				{
-					ListCell   *lc2;
+				/* Save locked tuple for EvalPlanQual testing below */
+				*testTuple = copyTuple;
 
-					EvalPlanQualBegin(&node->lr_epqstate, estate);
-
-					/*
-					 * Ensure that rels with already-visited rowmarks are told
-					 * not to return tuples during the first EPQ test.  We can
-					 * exit this loop once it reaches the current rowmark;
-					 * rels appearing later in the list will be set up
-					 * correctly by the EvalPlanQualSetTuple call at the top
-					 * of the loop.
-					 */
-					foreach(lc2, node->lr_arowMarks)
-					{
-						ExecAuxRowMark *aerm2 = (ExecAuxRowMark *) lfirst(lc2);
-
-						if (lc2 == lc)
-							break;
-						EvalPlanQualSetTuple(&node->lr_epqstate,
-											 aerm2->rowmark->rti,
-											 NULL);
-					}
-
-					epq_started = true;
-				}
-
-				/* Store target tuple for relation's scan node */
-				EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, copyTuple);
+				/* Remember we need to do EPQ testing */
+				epq_needed = true;
 
 				/* Continue loop until we have all target tuples */
 				break;
@@ -237,17 +252,35 @@ lnext:
 					 test);
 		}
 
-		/* Remember locked tuple's TID for WHERE CURRENT OF */
+		/* Remember locked tuple's TID for EPQ testing and WHERE CURRENT OF */
 		erm->curCtid = tuple.t_self;
 	}
 
 	/*
 	 * If we need to do EvalPlanQual testing, do so.
 	 */
-	if (epq_started)
+	if (epq_needed)
 	{
+		int			i;
+
+		/* Initialize EPQ machinery */
+		EvalPlanQualBegin(&node->lr_epqstate, estate);
+
+		/*
+		 * Transfer already-fetched tuples into the EPQ state, and make sure
+		 * its test tuples for other tables are reset to NULL.
+		 */
+		for (i = 0; i < node->lr_ntables; i++)
+		{
+			EvalPlanQualSetTuple(&node->lr_epqstate,
+								 i + 1,
+								 node->lr_curtuples[i]);
+			/* freeing this tuple is now the responsibility of EPQ */
+			node->lr_curtuples[i] = NULL;
+		}
+
 		/*
-		 * First, fetch a copy of any rows that were successfully locked
+		 * Next, fetch a copy of any rows that were successfully locked
 		 * without any update having occurred.  (We do this in a separate pass
 		 * so as to avoid overhead in the common case where there are no
 		 * concurrent updates.)
@@ -260,7 +293,7 @@ lnext:
 			Buffer		buffer;
 
 			/* ignore non-active child tables */
-			if (!ItemPointerIsValid(&(erm->curCtid)))
+			if (!erm->ermActive)
 			{
 				Assert(erm->rti != erm->prti);	/* check it's child table */
 				continue;
@@ -269,6 +302,10 @@ lnext:
 			if (EvalPlanQualGetTuple(&node->lr_epqstate, erm->rti) != NULL)
 				continue;		/* it was updated and fetched above */
 
+			/* foreign tables should have been fetched above */
+			Assert(erm->relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE);
+			Assert(ItemPointerIsValid(&(erm->curCtid)));
+
 			/* okay, fetch the tuple */
 			tuple.t_self = erm->curCtid;
 			if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
@@ -352,6 +389,13 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags)
 	lrstate->ps.ps_ProjInfo = NULL;
 
 	/*
+	 * Create workspace in which we can remember per-RTE locked tuples
+	 */
+	lrstate->lr_ntables = list_length(estate->es_range_table);
+	lrstate->lr_curtuples = (HeapTuple *)
+		palloc0(lrstate->lr_ntables * sizeof(HeapTuple));
+
+	/*
 	 * Locate the ExecRowMark(s) that this node is responsible for, and
 	 * construct ExecAuxRowMarks for them.  (InitPlan should already have
 	 * built the global list of ExecRowMarks.)
@@ -370,8 +414,11 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags)
 		if (rc->isParent)
 			continue;
 
+		/* safety check on size of lr_curtuples array */
+		Assert(rc->rti > 0 && rc->rti <= lrstate->lr_ntables);
+
 		/* find ExecRowMark and build ExecAuxRowMark */
-		erm = ExecFindRowMark(estate, rc->rti);
+		erm = ExecFindRowMark(estate, rc->rti, false);
 		aerm = ExecBuildAuxRowMark(erm, outerPlan->targetlist);
 
 		/*
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 34435c7e50a..aec41510946 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -1720,7 +1720,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 			continue;
 
 		/* find ExecRowMark (same for all subplans) */
-		erm = ExecFindRowMark(estate, rc->rti);
+		erm = ExecFindRowMark(estate, rc->rti, false);
 
 		/* build ExecAuxRowMark for each subplan */
 		for (i = 0; i < nplans; i++)
author	Tom Lane <tgl@sss.pgh.pa.us>	2015-05-12 14:10:10 -0400
committer	Tom Lane <tgl@sss.pgh.pa.us>	2015-05-12 14:10:17 -0400
commit	afb9249d06f47d7a6d4a89fea0c3625fe43c5a5d (patch)
tree	e7f62e6cb2b6baefa9489536966229e4af912695 /src/backend/executor
parent	aa4a0b9571232f44e4b8d9effca3c540e657cebb (diff)
download	postgresql-afb9249d06f47d7a6d4a89fea0c3625fe43c5a5d.tar.gz postgresql-afb9249d06f47d7a6d4a89fea0c3625fe43c5a5d.zip