Predicate locking in hash indexes.

Hash index searches acquire predicate locks on the primary page of a bucket. It acquires a lock on both the old and new buckets for scans that happen concurrently with page splits. During a bucket split, a predicate lock is copied from the primary page of an old bucket to the primary page of a new bucket. Author: Shubham Barai, Amit Kapila Reviewed by: Amit Kapila, Alexander Korotkov, Thomas Munro Discussion: https://www.postgresql.org/message-id/flat/CALxAEPvNsM2GTiXdRgaaZ1Pjd1bs+sxfFsf7Ytr+iq+5JJoYXA@mail.gmail.com
author: Teodor Sigaev <teodor@sigaev.ru> 2018-04-07 16:59:14 +0300
committer: Teodor Sigaev <teodor@sigaev.ru> 2018-04-07 16:59:14 +0300
commit: b508a56f2f3a2d850e75a14661943d6b4dde8274 (patch)
tree: 1c8e5b683669f19cf27606f472628fb162421bdd /src/backend
parent: 971d7ddbe19ad9525457e65d01b8b6504ab57cc4 (diff)
download: postgresql-b508a56f2f3a2d850e75a14661943d6b4dde8274.tar.gz
postgresql-b508a56f2f3a2d850e75a14661943d6b4dde8274.zip
5 files changed, 20 insertions, 1 deletions
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index e337439adad..4f2ea7955f9 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -68,7 +68,7 @@ hashhandler(PG_FUNCTION_ARGS)
 	amroutine->amsearchnulls = false;
 	amroutine->amstorage = false;
 	amroutine->amclusterable = false;
-	amroutine->ampredlocks = false;
+	amroutine->ampredlocks = true;
 	amroutine->amcanparallel = false;
 	amroutine->amkeytype = INT4OID;
 
diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c
index f121286b8ca..3eb722ce266 100644
--- a/src/backend/access/hash/hashinsert.c
+++ b/src/backend/access/hash/hashinsert.c
@@ -22,6 +22,7 @@
 #include "utils/rel.h"
 #include "storage/lwlock.h"
 #include "storage/buf_internals.h"
+#include "storage/predicate.h"
 
 static void _hash_vacuum_one_page(Relation rel, Buffer metabuf, Buffer buf,
 					  RelFileNode hnode);
@@ -88,6 +89,8 @@ restart_insert:
 										  &usedmetap);
 	Assert(usedmetap != NULL);
 
+	CheckForSerializableConflictIn(rel, NULL, buf);
+
 	/* remember the primary bucket buffer to release the pin on it at end. */
 	bucket_buf = buf;
 
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index 3859e3bd838..3ec29a53568 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -33,6 +33,7 @@
 #include "miscadmin.h"
 #include "storage/lmgr.h"
 #include "storage/smgr.h"
+#include "storage/predicate.h"
 
 
 static bool _hash_alloc_buckets(Relation rel, BlockNumber firstblock,
@@ -1107,6 +1108,11 @@ _hash_splitbucket(Relation rel,
 	npage = BufferGetPage(nbuf);
 	nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
 
+	/* Copy the predicate locks from old bucket to new bucket. */
+	PredicateLockPageSplit(rel,
+						   BufferGetBlockNumber(bucket_obuf),
+						   BufferGetBlockNumber(bucket_nbuf));
+
 	/*
 	 * Partition the tuples in the old bucket between the old bucket and the
 	 * new bucket, advancing along the old bucket's overflow bucket chain and
diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c
index c692c5b32d1..650041db0a5 100644
--- a/src/backend/access/hash/hashsearch.c
+++ b/src/backend/access/hash/hashsearch.c
@@ -19,6 +19,7 @@
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "utils/rel.h"
+#include "storage/predicate.h"
 
 static bool _hash_readpage(IndexScanDesc scan, Buffer *bufP,
 			   ScanDirection dir);
@@ -171,6 +172,7 @@ _hash_readnext(IndexScanDesc scan,
 		Assert(BufferIsValid(*bufp));
 
 		LockBuffer(*bufp, BUFFER_LOCK_SHARE);
+		PredicateLockPage(rel, BufferGetBlockNumber(*bufp), scan->xs_snapshot);
 
 		/*
 		 * setting hashso_buc_split to true indicates that we are scanning
@@ -347,6 +349,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
 	so->hashso_sk_hash = hashkey;
 
 	buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL);
+	PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
 	page = BufferGetPage(buf);
 	TestForOldSnapshot(scan->xs_snapshot, rel, page);
 	opaque = (HashPageOpaque) PageGetSpecialPointer(page);
diff --git a/src/backend/storage/lmgr/README-SSI b/src/backend/storage/lmgr/README-SSI
index 9e98af23c83..f2b099d1c9e 100644
--- a/src/backend/storage/lmgr/README-SSI
+++ b/src/backend/storage/lmgr/README-SSI
@@ -389,6 +389,13 @@ relation is required. Fast update postpones the insertion of tuples into index
 structure by temporarily storing them into pending list. That makes us unable
 to detect r-w conflicts using page-level locks.
 
+    * Hash index searches acquire predicate locks on the primary
+page of a bucket. It acquires a lock on both the old and new buckets
+for scans that happen concurrently with page splits. During a bucket
+split, a predicate lock is copied from the primary page of an old
+bucket to the primary page of a new bucket.
+
+
     * The effects of page splits, overflows, consolidations, and
 removals must be carefully reviewed to ensure that predicate locks
 aren't "lost" during those operations, or kept with pages which could
author	Teodor Sigaev <teodor@sigaev.ru>	2018-04-07 16:59:14 +0300
committer	Teodor Sigaev <teodor@sigaev.ru>	2018-04-07 16:59:14 +0300
commit	b508a56f2f3a2d850e75a14661943d6b4dde8274 (patch)
tree	1c8e5b683669f19cf27606f472628fb162421bdd /src/backend
parent	971d7ddbe19ad9525457e65d01b8b6504ab57cc4 (diff)
download	postgresql-b508a56f2f3a2d850e75a14661943d6b4dde8274.tar.gz postgresql-b508a56f2f3a2d850e75a14661943d6b4dde8274.zip