aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2008-02-02 22:26:23 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2008-02-02 22:26:23 +0000
commited5858dff87b15e157398228d57b422bc705b220 (patch)
tree9050a9191157ef71aec33debb2b24f8b30557ba1
parent6909d80982a6d11d9d6c9e49a16c414f0aa98c2a (diff)
downloadpostgresql-ed5858dff87b15e157398228d57b422bc705b220.tar.gz
postgresql-ed5858dff87b15e157398228d57b422bc705b220.zip
Fix WaitOnLock() to ensure that the process's "waiting" flag is reset after
erroring out of a wait. We can use a PG_TRY block for this, but add a comment explaining why it'd be a bad idea to use it for any other state cleanup. Back-patch to 8.2. Prior releases had the same issue, but only with respect to the process title, which is likely to get reset almost immediately anyway after the transaction aborts, so it seems not worth changing them. In 8.2 and HEAD, the pg_stat_activity "waiting" flag could remain set incorrectly for a long time. Per report from Gurjeet Singh.
-rw-r--r--src/backend/storage/lmgr/lock.c69
1 files changed, 47 insertions, 22 deletions
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 939528e7548..a8d2ad01d9f 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.174 2006/10/04 00:29:57 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.174.2.1 2008/02/02 22:26:23 tgl Exp $
*
* NOTES
* A lock table is a shared memory hash table. When
@@ -1102,9 +1102,7 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
{
LOCKMETHODID lockmethodid = LOCALLOCK_LOCKMETHOD(*locallock);
LockMethod lockMethodTable = LockMethods[lockmethodid];
- const char *old_status;
- char *new_status = NULL;
- int len;
+ char * volatile new_status = NULL;
LOCK_PRINT("WaitOnLock: sleeping on lock",
locallock->lock, locallock->tag.mode);
@@ -1112,6 +1110,9 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
/* Report change to waiting status */
if (update_process_title)
{
+ const char *old_status;
+ int len;
+
old_status = get_ps_display(&len);
new_status = (char *) palloc(len + 8 + 1);
memcpy(new_status, old_status, len);
@@ -1132,38 +1133,62 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
* that a cancel/die interrupt will interrupt ProcSleep after someone else
* grants us the lock, but before we've noticed it. Hence, after granting,
* the locktable state must fully reflect the fact that we own the lock;
- * we can't do additional work on return. Contrariwise, if we fail, any
- * cleanup must happen in xact abort processing, not here, to ensure it
- * will also happen in the cancel/die case.
+ * we can't do additional work on return.
+ *
+ * We can and do use a PG_TRY block to try to clean up after failure,
+ * but this still has a major limitation: elog(FATAL) can occur while
+ * waiting (eg, a "die" interrupt), and then control won't come back here.
+ * So all cleanup of essential state should happen in LockWaitCancel,
+ * not here. We can use PG_TRY to clear the "waiting" status flags,
+ * since doing that is unimportant if the process exits.
*/
+ PG_TRY();
+ {
+ if (ProcSleep(locallock, lockMethodTable) != STATUS_OK)
+ {
+ /*
+ * We failed as a result of a deadlock, see CheckDeadLock().
+ * Quit now.
+ */
+ awaitedLock = NULL;
+ LOCK_PRINT("WaitOnLock: aborting on lock",
+ locallock->lock, locallock->tag.mode);
+ LWLockRelease(LockHashPartitionLock(locallock->hashcode));
- if (ProcSleep(locallock, lockMethodTable) != STATUS_OK)
+ /*
+ * Now that we aren't holding the partition lock, we can give an
+ * error report including details about the detected deadlock.
+ */
+ DeadLockReport();
+ /* not reached */
+ }
+ }
+ PG_CATCH();
{
- /*
- * We failed as a result of a deadlock, see CheckDeadLock(). Quit now.
- */
- awaitedLock = NULL;
- LOCK_PRINT("WaitOnLock: aborting on lock",
- locallock->lock, locallock->tag.mode);
- LWLockRelease(LockHashPartitionLock(locallock->hashcode));
+ /* In this path, awaitedLock remains set until LockWaitCancel */
- /*
- * Now that we aren't holding the partition lock, we can give an error
- * report including details about the detected deadlock.
- */
- DeadLockReport();
- /* not reached */
+ /* Report change to non-waiting status */
+ pgstat_report_waiting(false);
+ if (update_process_title)
+ {
+ set_ps_display(new_status, false);
+ pfree(new_status);
+ }
+
+ /* and propagate the error */
+ PG_RE_THROW();
}
+ PG_END_TRY();
awaitedLock = NULL;
/* Report change to non-waiting status */
+ pgstat_report_waiting(false);
if (update_process_title)
{
set_ps_display(new_status, false);
pfree(new_status);
}
- pgstat_report_waiting(false);
LOCK_PRINT("WaitOnLock: wakeup on lock",
locallock->lock, locallock->tag.mode);