aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2019-07-29 18:49:04 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2019-07-29 18:49:04 -0400
commitd933816c04c64326419bbcd89dc17a4e310f950b (patch)
tree107717b867ddff9008f0dea06c10478b8ab41dfc
parent28bbf7a81b3a30504cc7dfdbd76b410d1f127b8e (diff)
downloadpostgresql-d933816c04c64326419bbcd89dc17a4e310f950b.tar.gz
postgresql-d933816c04c64326419bbcd89dc17a4e310f950b.zip
Fix busted logic for parallel lock grouping in TopoSort().
A "break" statement erroneously left behind by commit a1c1af2a1 caused TopoSort to do the wrong thing if a lock's wait list contained multiple members of the same locking group. Because parallel workers don't normally need any locks not already taken by their leader, this is very hard --- maybe impossible --- to hit in production. Still, if it did happen, the queries involved in an otherwise-resolvable deadlock would block until canceled. In addition to removing the bogus "break", add an Assert showing that the conflicting uses of the beforeConstraints[] array (for both counts and flags) don't overlap, and add some commentary explaining why not; because it's not obvious without explanation, IMHO. Original report and patch from Rui Hai Jiang; additional assert and commentary by me. Back-patch to 9.6 where the bug came in. Discussion: https://postgr.es/m/CAEri+mLd3bpHLyW+a9pSe1y=aEkeuJpwBSwvo-+m4n7-ceRmXw@mail.gmail.com
-rw-r--r--src/backend/storage/lmgr/deadlock.c8
1 files changed, 7 insertions, 1 deletions
diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c
index 9abc9d778f6..f17ef9377a6 100644
--- a/src/backend/storage/lmgr/deadlock.c
+++ b/src/backend/storage/lmgr/deadlock.c
@@ -922,6 +922,12 @@ TopoSort(LOCK *lock,
* in the same lock group on the queue, set their number of
* beforeConstraints to -1 to indicate that they should be emitted
* with their groupmates rather than considered separately.
+ *
+ * In this loop and the similar one just below, it's critical that we
+ * consistently select the same representative member of any one lock
+ * group, so that all the constraints are associated with the same
+ * proc, and the -1's are only associated with not-representative
+ * members. We select the last one in the topoProcs array.
*/
proc = constraints[i].waiter;
Assert(proc != NULL);
@@ -940,7 +946,6 @@ TopoSort(LOCK *lock,
Assert(beforeConstraints[j] <= 0);
beforeConstraints[j] = -1;
}
- break;
}
}
@@ -977,6 +982,7 @@ TopoSort(LOCK *lock,
if (kk < 0)
continue;
+ Assert(beforeConstraints[jj] >= 0);
beforeConstraints[jj]++; /* waiter must come before */
/* add this constraint to list of after-constraints for blocker */
constraints[i].pred = jj;