aboutsummaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2007-04-26 23:25:09 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2007-04-26 23:25:09 +0000
commitca27e5ec4cc7439969efcebfc1c2dc5389f272fa (patch)
tree255e1257f4c62fbb9ecbd5d628f8c1fecbf5bc32 /src/include
parenta99381619efd1988c0ad0a7119a9ff4a11cef9cd (diff)
downloadpostgresql-ca27e5ec4cc7439969efcebfc1c2dc5389f272fa.tar.gz
postgresql-ca27e5ec4cc7439969efcebfc1c2dc5389f272fa.zip
Fix dynahash.c to suppress hash bucket splits while a hash_seq_search() scan
is in progress on the same hashtable. This seems the least invasive way to fix the recently-recognized problem that a split could cause the scan to visit entries twice or (with much lower probability) miss them entirely. The only field-reported problem caused by this is the "failed to re-find shared lock object" PANIC in COMMIT PREPARED reported by Michel Dorochevsky, which was caused by multiply visited entries. However, it seems certain that mdsync() is vulnerable to missing required fsync's due to missed entries, and I am fearful that RelationCacheInitializePhase2() might be at risk as well. Because of that and the generalized hazard presented by this bug, back-patch all the supported branches. Along the way, fix pg_prepared_statement() and pg_cursor() to not assume that the hashtables they are examining will stay static between calls. This is risky regardless of the newly noted dynahash problem, because hash_seq_search() has never promised to cope with deletion of table entries other than the just-returned one. There may be no bug here because the only supported way to call these functions is via ExecMakeTableFunctionResult() which will cycle them to completion before doing anything very interesting, but it seems best to get rid of the assumption. This affects 8.2 and HEAD only, since those functions weren't there earlier.
Diffstat (limited to 'src/include')
-rw-r--r--src/include/nodes/execnodes.h16
-rw-r--r--src/include/utils/hsearch.h8
2 files changed, 21 insertions, 3 deletions
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 2c53f12ce5a..f873f93cc52 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.139.2.3 2005/11/28 23:46:25 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.139.2.4 2007/04/26 23:25:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -382,8 +382,20 @@ typedef struct TupleHashTableData
typedef HASH_SEQ_STATUS TupleHashIterator;
-#define ResetTupleHashIterator(htable, iter) \
+/*
+ * Use InitTupleHashIterator/TermTupleHashIterator for a read/write scan.
+ * Use ResetTupleHashIterator if the table can be frozen (in this case no
+ * explicit scan termination is needed).
+ */
+#define InitTupleHashIterator(htable, iter) \
hash_seq_init(iter, (htable)->hashtab)
+#define TermTupleHashIterator(iter) \
+ hash_seq_term(iter)
+#define ResetTupleHashIterator(htable, iter) \
+ do { \
+ hash_freeze((htable)->hashtab); \
+ hash_seq_init(iter, (htable)->hashtab); \
+ } while (0)
#define ScanTupleHashTable(iter) \
((TupleHashEntry) hash_seq_search(iter))
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h
index 24b43e892c6..cdc70a77d82 100644
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.41.2.1 2006/06/25 18:29:56 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.41.2.2 2007/04/26 23:25:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -119,6 +119,8 @@ typedef struct HTAB
MemoryContext hcxt; /* memory context if default allocator used */
char *tabname; /* table name (for error messages) */
bool isshared; /* true if table is in shared memory */
+ /* freezing a shared table isn't allowed, so we can keep state here */
+ bool frozen; /* true = no more inserts allowed */
} HTAB;
/* Parameter data structure for hash_create */
@@ -185,8 +187,12 @@ extern void *hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action,
bool *foundPtr);
extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
extern void *hash_seq_search(HASH_SEQ_STATUS *status);
+extern void hash_seq_term(HASH_SEQ_STATUS *status);
+extern void hash_freeze(HTAB *hashp);
extern Size hash_estimate_size(long num_entries, Size entrysize);
extern long hash_select_dirsize(long num_entries);
+extern void AtEOXact_HashTables(bool isCommit);
+extern void AtEOSubXact_HashTables(bool isCommit, int nestDepth);
/*
* prototypes for functions in hashfn.c