aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/transam/xact.c4
-rw-r--r--src/backend/executor/nodeSubplan.c8
-rw-r--r--src/backend/utils/hash/dynahash.c169
-rw-r--r--src/include/nodes/execnodes.h16
-rw-r--r--src/include/utils/hsearch.h7
5 files changed, 194 insertions, 10 deletions
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index ba48e727e40..c8a68b5fb3a 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.156.2.3 2006/05/21 20:06:43 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.156.2.4 2007/04/26 23:25:40 tgl Exp $
*
* NOTES
* Transaction aborts can now occur two ways:
@@ -1011,6 +1011,7 @@ CommitTransaction(void)
AtEOXact_Namespace(true);
AtEOXact_CatCache(true);
AtEOXact_Files();
+ AtEOXact_HashTables(true);
pgstat_count_xact_commit();
AtCommit_Memory();
@@ -1126,6 +1127,7 @@ AbortTransaction(void)
AtEOXact_Namespace(false);
AtEOXact_CatCache(false);
AtEOXact_Files();
+ AtEOXact_HashTables(false);
SetReindexProcessing(InvalidOid, InvalidOid);
pgstat_count_xact_rollback();
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 971dd5879db..c93a9348a4b 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.58 2003/10/01 21:30:52 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.58.2.1 2007/04/26 23:25:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -627,7 +627,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
TupleHashIterator hashiter;
TupleHashEntry entry;
- ResetTupleHashIterator(hashtable, &hashiter);
+ InitTupleHashIterator(hashtable, &hashiter);
while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
{
if (!execTuplesUnequal(entry->firstTuple,
@@ -636,8 +636,12 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
numCols, keyColIdx,
hashtable->eqfunctions,
hashtable->tempcxt))
+ {
+ TermTupleHashIterator(&hashiter);
return true;
+ }
}
+ /* No TermTupleHashIterator call needed here */
return false;
}
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index ef92103b64a..ad9db513ac3 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.48.2.1 2005/06/18 20:51:59 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.48.2.2 2007/04/26 23:25:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -69,6 +69,9 @@ static bool expand_table(HTAB *hashp);
static bool hdefault(HTAB *hashp);
static bool init_htab(HTAB *hashp, long nelem);
static void hash_corrupted(HTAB *hashp);
+static void register_seq_scan(HTAB *hashp);
+static void deregister_seq_scan(HTAB *hashp);
+static bool has_seq_scans(HTAB *hashp);
/*
@@ -185,6 +188,8 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
return NULL;
}
+ hashp->frozen = false;
+
if (!hdefault(hashp))
return NULL;
@@ -633,6 +638,10 @@ hash_search(HTAB *hashp,
if (currBucket != NULL)
return (void *) ELEMENTKEY(currBucket);
+ /* disallow inserts if frozen */
+ if (hashp->frozen)
+ elog(ERROR, "cannot insert into a frozen hashtable");
+
/* get the next free element */
currBucket = hctl->freeList;
if (currBucket == NULL)
@@ -656,8 +665,12 @@ hash_search(HTAB *hashp,
/* caller is expected to fill the data field on return */
- /* Check if it is time to split the segment */
- if (++hctl->nentries / (long) (hctl->max_bucket + 1) > hctl->ffactor)
+ /*
+ * Check if it is time to split a bucket. Can't split if table
+ * is the subject of any active hash_seq_search scans.
+ */
+ if (++hctl->nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor &&
+ !has_seq_scans(hashp))
{
/*
* NOTE: failure to expand table is not a fatal error, it
@@ -676,15 +689,25 @@ hash_search(HTAB *hashp,
}
/*
- * hash_seq_init/_search
+ * hash_seq_init/_search/_term
* Sequentially search through hash table and return
* all the elements one by one, return NULL when no more.
*
+ * hash_seq_term should be called if and only if the scan is abandoned before
+ * completion; if hash_seq_search returns NULL then it has already done the
+ * end-of-scan cleanup.
+ *
* NOTE: caller may delete the returned element before continuing the scan.
* However, deleting any other element while the scan is in progress is
* UNDEFINED (it might be the one that curIndex is pointing at!). Also,
* if elements are added to the table while the scan is in progress, it is
* unspecified whether they will be visited by the scan or not.
+ *
+ * NOTE: it is possible to use hash_seq_init/hash_seq_search without any
+ * worry about hash_seq_term cleanup, if the hashtable is first locked against
+ * further insertions by calling hash_freeze. This is used by nodeAgg.c,
+ * wherein it is inconvenient to track whether a scan is still open, and
+ * there's no possibility of further insertions after readout has begun.
*/
void
hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
@@ -692,6 +715,8 @@ hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
status->hashp = hashp;
status->curBucket = 0;
status->curEntry = NULL;
+ if (!hashp->frozen)
+ register_seq_scan(hashp);
}
void *
@@ -745,9 +770,40 @@ hash_seq_search(HASH_SEQ_STATUS *status)
++status->curBucket;
}
+ hash_seq_term(status);
return NULL; /* out of buckets */
}
+void
+hash_seq_term(HASH_SEQ_STATUS *status)
+{
+ if (!status->hashp->frozen)
+ deregister_seq_scan(status->hashp);
+}
+
+/*
+ * hash_freeze
+ * Freeze a hashtable against future insertions (deletions are
+ * still allowed)
+ *
+ * The reason for doing this is that by preventing any more bucket splits,
+ * we no longer need to worry about registering hash_seq_search scans,
+ * and thus caller need not be careful about ensuring hash_seq_term gets
+ * called at the right times.
+ *
+ * Multiple calls to hash_freeze() are allowed, but you can't freeze a table
+ * with active scans (since hash_seq_term would then do the wrong thing).
+ */
+void
+hash_freeze(HTAB *hashp)
+{
+ if (hashp->isshared)
+ elog(ERROR, "cannot freeze shared hashtable");
+ if (!hashp->frozen && has_seq_scans(hashp))
+ elog(ERROR, "cannot freeze hashtable with active scans");
+ hashp->frozen = true;
+}
+
/********************************* UTILITIES ************************/
@@ -958,3 +1014,108 @@ my_log2(long num)
;
return i;
}
+
+
+/************************* SEQ SCAN TRACKING ************************/
+
+/*
+ * We track active hash_seq_search scans here. The need for this mechanism
+ * comes from the fact that a scan will get confused if a bucket split occurs
+ * while it's in progress: it might visit entries twice, or even miss some
+ * entirely (if it's partway through the same bucket that splits). Hence
+ * we want to inhibit bucket splits if there are any active scans on the
+ * table being inserted into. This is a fairly rare case in current usage,
+ * so just postponing the split until the next insertion seems sufficient.
+ *
+ * Given present usages of the function, only a few scans are likely to be
+ * open concurrently; so a finite-size stack of open scans seems sufficient,
+ * and we don't worry that linear search is too slow. Note that we do
+ * allow multiple scans of the same hashtable to be open concurrently.
+ *
+ * This mechanism can support concurrent scan and insertion in a shared
+ * hashtable if it's the same backend doing both. It would fail otherwise,
+ * but locking reasons seem to preclude any such scenario anyway, so we don't
+ * worry.
+ *
+ * This arrangement is reasonably robust if a transient hashtable is deleted
+ * without notifying us. The absolute worst case is we might inhibit splits
+ * in another table created later at exactly the same address. We will give
+ * a warning at transaction end for reference leaks, so any bugs leading to
+ * lack of notification should be easy to catch.
+ */
+
+#define MAX_SEQ_SCANS 100
+
+static HTAB *seq_scan_tables[MAX_SEQ_SCANS]; /* tables being scanned */
+static int num_seq_scans = 0;
+
+
+/* Register a table as having an active hash_seq_search scan */
+static void
+register_seq_scan(HTAB *hashp)
+{
+ if (num_seq_scans >= MAX_SEQ_SCANS)
+ elog(ERROR, "too many active hash_seq_search scans");
+ seq_scan_tables[num_seq_scans] = hashp;
+ num_seq_scans++;
+}
+
+/* Deregister an active scan */
+static void
+deregister_seq_scan(HTAB *hashp)
+{
+ int i;
+
+ /* Search backward since it's most likely at the stack top */
+ for (i = num_seq_scans - 1; i >= 0; i--)
+ {
+ if (seq_scan_tables[i] == hashp)
+ {
+ seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
+ num_seq_scans--;
+ return;
+ }
+ }
+ elog(ERROR, "no hash_seq_search scan for hash table \"%s\"",
+ hashp->tabname);
+}
+
+/* Check if a table has any active scan */
+static bool
+has_seq_scans(HTAB *hashp)
+{
+ int i;
+
+ for (i = 0; i < num_seq_scans; i++)
+ {
+ if (seq_scan_tables[i] == hashp)
+ return true;
+ }
+ return false;
+}
+
+/* Clean up any open scans at end of transaction */
+void
+AtEOXact_HashTables(bool isCommit)
+{
+ /*
+ * During abort cleanup, open scans are expected; just silently clean 'em
+ * out. An open scan at commit means someone forgot a hash_seq_term()
+ * call, so complain.
+ *
+ * Note: it's tempting to try to print the tabname here, but refrain for
+ * fear of touching deallocated memory. This isn't a user-facing message
+ * anyway, so it needn't be pretty.
+ */
+ if (isCommit)
+ {
+ int i;
+
+ for (i = 0; i < num_seq_scans; i++)
+ {
+ elog(WARNING, "leaked hash_seq_search scan for hash table %p",
+ seq_scan_tables[i]);
+ }
+ }
+ num_seq_scans = 0;
+}
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 263681dc3e2..71a09aa59ae 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: execnodes.h,v 1.107.2.2 2004/01/22 02:23:35 tgl Exp $
+ * $Id: execnodes.h,v 1.107.2.3 2007/04/26 23:25:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -365,8 +365,20 @@ typedef struct TupleHashTableData
typedef HASH_SEQ_STATUS TupleHashIterator;
-#define ResetTupleHashIterator(htable, iter) \
+/*
+ * Use InitTupleHashIterator/TermTupleHashIterator for a read/write scan.
+ * Use ResetTupleHashIterator if the table can be frozen (in this case no
+ * explicit scan termination is needed).
+ */
+#define InitTupleHashIterator(htable, iter) \
hash_seq_init(iter, (htable)->hashtab)
+#define TermTupleHashIterator(iter) \
+ hash_seq_term(iter)
+#define ResetTupleHashIterator(htable, iter) \
+ do { \
+ hash_freeze((htable)->hashtab); \
+ hash_seq_init(iter, (htable)->hashtab); \
+ } while (0)
#define ScanTupleHashTable(iter) \
((TupleHashEntry) hash_seq_search(iter))
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h
index d6f43163811..7a969005372 100644
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: hsearch.h,v 1.29.2.1 2005/06/18 20:51:59 tgl Exp $
+ * $Id: hsearch.h,v 1.29.2.2 2007/04/26 23:25:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -109,6 +109,8 @@ typedef struct HTAB
* used */
char *tabname; /* table name (for error messages) */
bool isshared; /* true if table is in shared memory */
+ /* freezing a shared table isn't allowed, so we can keep state here */
+ bool frozen; /* true = no more inserts allowed */
HashCopyFunc keycopy; /* key copying function */
} HTAB;
@@ -178,8 +180,11 @@ extern void *hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action,
bool *foundPtr);
extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
extern void *hash_seq_search(HASH_SEQ_STATUS *status);
+extern void hash_seq_term(HASH_SEQ_STATUS *status);
+extern void hash_freeze(HTAB *hashp);
extern long hash_estimate_size(long num_entries, Size entrysize);
extern long hash_select_dirsize(long num_entries);
+extern void AtEOXact_HashTables(bool isCommit);
/*
* prototypes for functions in hashfn.c