aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/transam/xact.c7
-rw-r--r--src/backend/executor/nodeSubplan.c8
-rw-r--r--src/backend/nodes/tidbitmap.c8
-rw-r--r--src/backend/storage/smgr/md.c3
-rw-r--r--src/backend/utils/hash/dynahash.c201
-rw-r--r--src/backend/utils/mmgr/portalmem.c6
-rw-r--r--src/include/nodes/execnodes.h16
-rw-r--r--src/include/utils/hsearch.h8
8 files changed, 242 insertions, 15 deletions
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index a1bac34e168..70dae4ab961 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.215.2.1 2005/11/22 18:23:05 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.215.2.2 2007/04/26 23:25:08 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1578,6 +1578,7 @@ CommitTransaction(void)
AtEOXact_Namespace(true);
/* smgrcommit already done */
AtEOXact_Files();
+ AtEOXact_HashTables(true);
pgstat_count_xact_commit();
CurrentResourceOwner = NULL;
@@ -1792,6 +1793,7 @@ PrepareTransaction(void)
AtEOXact_Namespace(true);
/* smgrcommit already done */
AtEOXact_Files();
+ AtEOXact_HashTables(true);
CurrentResourceOwner = NULL;
ResourceOwnerDelete(TopTransactionResourceOwner);
@@ -1940,6 +1942,7 @@ AbortTransaction(void)
AtEOXact_Namespace(false);
smgrabort();
AtEOXact_Files();
+ AtEOXact_HashTables(false);
pgstat_count_xact_rollback();
/*
@@ -3651,6 +3654,7 @@ CommitSubTransaction(void)
s->parent->subTransactionId);
AtEOSubXact_Files(true, s->subTransactionId,
s->parent->subTransactionId);
+ AtEOSubXact_HashTables(true, s->nestingLevel);
/*
* We need to restore the upper transaction's read-only state, in case the
@@ -3760,6 +3764,7 @@ AbortSubTransaction(void)
s->parent->subTransactionId);
AtEOSubXact_Files(false, s->subTransactionId,
s->parent->subTransactionId);
+ AtEOSubXact_HashTables(false, s->nestingLevel);
}
/*
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 6ee5e13a1dc..19e92e8f343 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.70.2.2 2007/02/02 00:07:44 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.70.2.3 2007/04/26 23:25:08 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -616,7 +616,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
TupleHashIterator hashiter;
TupleHashEntry entry;
- ResetTupleHashIterator(hashtable, &hashiter);
+ InitTupleHashIterator(hashtable, &hashiter);
while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
{
ExecStoreTuple(entry->firstTuple, hashtable->tableslot,
@@ -625,8 +625,12 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
numCols, keyColIdx,
hashtable->eqfunctions,
hashtable->tempcxt))
+ {
+ TermTupleHashIterator(&hashiter);
return true;
+ }
}
+ /* No TermTupleHashIterator call needed here */
return false;
}
diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c
index bcfc7d0920c..b58bfe0cc66 100644
--- a/src/backend/nodes/tidbitmap.c
+++ b/src/backend/nodes/tidbitmap.c
@@ -23,7 +23,7 @@
* Copyright (c) 2003-2005, PostgreSQL Global Development Group
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.8 2005/10/15 02:49:19 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.8.2.1 2007/04/26 23:25:08 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -907,7 +907,11 @@ tbm_lossify(TIDBitmap *tbm)
tbm_mark_page_lossy(tbm, page->blockno);
if (tbm->nentries <= tbm->maxentries)
- return; /* we have done enough */
+ {
+ /* we have done enough */
+ hash_seq_term(&status);
+ break;
+ }
/*
* Note: tbm_mark_page_lossy may have inserted a lossy chunk into the
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 38e431e8033..c25216a08e4 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.118.2.3 2007/04/12 17:11:07 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.118.2.4 2007/04/26 23:25:08 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -920,6 +920,7 @@ mdsync(void)
entry->tag.rnode.spcNode,
entry->tag.rnode.dbNode,
entry->tag.rnode.relNode)));
+ hash_seq_term(&hstat);
return false;
}
else
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index c5caf21ba77..2672ab1170a 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/hash/dynahash.c,v 1.65.2.2 2006/06/25 18:29:56 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/hash/dynahash.c,v 1.65.2.3 2007/04/26 23:25:08 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -44,6 +44,7 @@
#include "postgres.h"
+#include "access/xact.h"
#include "storage/shmem.h"
#include "utils/dynahash.h"
#include "utils/hsearch.h"
@@ -71,6 +72,9 @@ static void hdefault(HTAB *hashp);
static int choose_nelem_alloc(Size entrysize);
static bool init_htab(HTAB *hashp, long nelem);
static void hash_corrupted(HTAB *hashp);
+static void register_seq_scan(HTAB *hashp);
+static void deregister_seq_scan(HTAB *hashp);
+static bool has_seq_scans(HTAB *hashp);
/*
@@ -216,6 +220,8 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
errmsg("out of memory")));
}
+ hashp->frozen = false;
+
hdefault(hashp);
hctl = hashp->hctl;
@@ -673,6 +679,10 @@ hash_search(HTAB *hashp,
if (currBucket != NULL)
return (void *) ELEMENTKEY(currBucket);
+ /* disallow inserts if frozen */
+ if (hashp->frozen)
+ elog(ERROR, "cannot insert into a frozen hashtable");
+
/* get the next free element */
currBucket = hctl->freeList;
if (currBucket == NULL)
@@ -709,8 +719,12 @@ hash_search(HTAB *hashp,
/* caller is expected to fill the data field on return */
- /* Check if it is time to split the segment */
- if (++hctl->nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor)
+ /*
+ * Check if it is time to split a bucket. Can't split if table
+ * is the subject of any active hash_seq_search scans.
+ */
+ if (++hctl->nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor &&
+ !has_seq_scans(hashp))
{
/*
* NOTE: failure to expand table is not a fatal error, it just
@@ -728,15 +742,25 @@ hash_search(HTAB *hashp,
}
/*
- * hash_seq_init/_search
+ * hash_seq_init/_search/_term
* Sequentially search through hash table and return
* all the elements one by one, return NULL when no more.
*
+ * hash_seq_term should be called if and only if the scan is abandoned before
+ * completion; if hash_seq_search returns NULL then it has already done the
+ * end-of-scan cleanup.
+ *
* NOTE: caller may delete the returned element before continuing the scan.
* However, deleting any other element while the scan is in progress is
* UNDEFINED (it might be the one that curIndex is pointing at!). Also,
* if elements are added to the table while the scan is in progress, it is
* unspecified whether they will be visited by the scan or not.
+ *
+ * NOTE: it is possible to use hash_seq_init/hash_seq_search without any
+ * worry about hash_seq_term cleanup, if the hashtable is first locked against
+ * further insertions by calling hash_freeze. This is used by nodeAgg.c,
+ * wherein it is inconvenient to track whether a scan is still open, and
+ * there's no possibility of further insertions after readout has begun.
*/
void
hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
@@ -744,6 +768,8 @@ hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
status->hashp = hashp;
status->curBucket = 0;
status->curEntry = NULL;
+ if (!hashp->frozen)
+ register_seq_scan(hashp);
}
void *
@@ -778,7 +804,10 @@ hash_seq_search(HASH_SEQ_STATUS *status)
max_bucket = hctl->max_bucket;
if (curBucket > max_bucket)
+ {
+ hash_seq_term(status);
return NULL; /* search is done */
+ }
/*
* first find the right segment in the table directory.
@@ -800,6 +829,7 @@ hash_seq_search(HASH_SEQ_STATUS *status)
if (++curBucket > max_bucket)
{
status->curBucket = curBucket;
+ hash_seq_term(status);
return NULL; /* search is done */
}
if (++segment_ndx >= ssize)
@@ -818,6 +848,36 @@ hash_seq_search(HASH_SEQ_STATUS *status)
return (void *) ELEMENTKEY(curElem);
}
+void
+hash_seq_term(HASH_SEQ_STATUS *status)
+{
+ if (!status->hashp->frozen)
+ deregister_seq_scan(status->hashp);
+}
+
+/*
+ * hash_freeze
+ * Freeze a hashtable against future insertions (deletions are
+ * still allowed)
+ *
+ * The reason for doing this is that by preventing any more bucket splits,
+ * we no longer need to worry about registering hash_seq_search scans,
+ * and thus caller need not be careful about ensuring hash_seq_term gets
+ * called at the right times.
+ *
+ * Multiple calls to hash_freeze() are allowed, but you can't freeze a table
+ * with active scans (since hash_seq_term would then do the wrong thing).
+ */
+void
+hash_freeze(HTAB *hashp)
+{
+ if (hashp->isshared)
+ elog(ERROR, "cannot freeze shared hashtable");
+ if (!hashp->frozen && has_seq_scans(hashp))
+ elog(ERROR, "cannot freeze hashtable with active scans");
+ hashp->frozen = true;
+}
+
/********************************* UTILITIES ************************/
@@ -1031,3 +1091,136 @@ my_log2(long num)
;
return i;
}
+
+
+/************************* SEQ SCAN TRACKING ************************/
+
+/*
+ * We track active hash_seq_search scans here. The need for this mechanism
+ * comes from the fact that a scan will get confused if a bucket split occurs
+ * while it's in progress: it might visit entries twice, or even miss some
+ * entirely (if it's partway through the same bucket that splits). Hence
+ * we want to inhibit bucket splits if there are any active scans on the
+ * table being inserted into. This is a fairly rare case in current usage,
+ * so just postponing the split until the next insertion seems sufficient.
+ *
+ * Given present usages of the function, only a few scans are likely to be
+ * open concurrently; so a finite-size stack of open scans seems sufficient,
+ * and we don't worry that linear search is too slow. Note that we do
+ * allow multiple scans of the same hashtable to be open concurrently.
+ *
+ * This mechanism can support concurrent scan and insertion in a shared
+ * hashtable if it's the same backend doing both. It would fail otherwise,
+ * but locking reasons seem to preclude any such scenario anyway, so we don't
+ * worry.
+ *
+ * This arrangement is reasonably robust if a transient hashtable is deleted
+ * without notifying us. The absolute worst case is we might inhibit splits
+ * in another table created later at exactly the same address. We will give
+ * a warning at transaction end for reference leaks, so any bugs leading to
+ * lack of notification should be easy to catch.
+ */
+
+#define MAX_SEQ_SCANS 100
+
+static HTAB *seq_scan_tables[MAX_SEQ_SCANS]; /* tables being scanned */
+static int seq_scan_level[MAX_SEQ_SCANS]; /* subtransaction nest level */
+static int num_seq_scans = 0;
+
+
+/* Register a table as having an active hash_seq_search scan */
+static void
+register_seq_scan(HTAB *hashp)
+{
+ if (num_seq_scans >= MAX_SEQ_SCANS)
+ elog(ERROR, "too many active hash_seq_search scans");
+ seq_scan_tables[num_seq_scans] = hashp;
+ seq_scan_level[num_seq_scans] = GetCurrentTransactionNestLevel();
+ num_seq_scans++;
+}
+
+/* Deregister an active scan */
+static void
+deregister_seq_scan(HTAB *hashp)
+{
+ int i;
+
+ /* Search backward since it's most likely at the stack top */
+ for (i = num_seq_scans - 1; i >= 0; i--)
+ {
+ if (seq_scan_tables[i] == hashp)
+ {
+ seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
+ seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
+ num_seq_scans--;
+ return;
+ }
+ }
+ elog(ERROR, "no hash_seq_search scan for hash table \"%s\"",
+ hashp->tabname);
+}
+
+/* Check if a table has any active scan */
+static bool
+has_seq_scans(HTAB *hashp)
+{
+ int i;
+
+ for (i = 0; i < num_seq_scans; i++)
+ {
+ if (seq_scan_tables[i] == hashp)
+ return true;
+ }
+ return false;
+}
+
+/* Clean up any open scans at end of transaction */
+void
+AtEOXact_HashTables(bool isCommit)
+{
+ /*
+ * During abort cleanup, open scans are expected; just silently clean 'em
+ * out. An open scan at commit means someone forgot a hash_seq_term()
+ * call, so complain.
+ *
+ * Note: it's tempting to try to print the tabname here, but refrain for
+ * fear of touching deallocated memory. This isn't a user-facing message
+ * anyway, so it needn't be pretty.
+ */
+ if (isCommit)
+ {
+ int i;
+
+ for (i = 0; i < num_seq_scans; i++)
+ {
+ elog(WARNING, "leaked hash_seq_search scan for hash table %p",
+ seq_scan_tables[i]);
+ }
+ }
+ num_seq_scans = 0;
+}
+
+/* Clean up any open scans at end of subtransaction */
+void
+AtEOSubXact_HashTables(bool isCommit, int nestDepth)
+{
+ int i;
+
+ /*
+ * Search backward to make cleanup easy. Note we must check all entries,
+ * not only those at the end of the array, because deletion technique
+ * doesn't keep them in order.
+ */
+ for (i = num_seq_scans - 1; i >= 0; i--)
+ {
+ if (seq_scan_level[i] >= nestDepth)
+ {
+ if (isCommit)
+ elog(WARNING, "leaked hash_seq_search scan for hash table %p",
+ seq_scan_tables[i]);
+ seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
+ seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
+ num_seq_scans--;
+ }
+ }
+}
diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c
index 8120720041b..5f599699665 100644
--- a/src/backend/utils/mmgr/portalmem.c
+++ b/src/backend/utils/mmgr/portalmem.c
@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.82.2.1 2005/11/22 18:23:25 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.82.2.2 2007/04/26 23:25:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -548,7 +548,9 @@ AtCommit_Portals(void)
/* Zap all non-holdable portals */
PortalDrop(portal, true);
- /* Restart the iteration */
+ /* Restart the iteration in case that led to other drops */
+ /* XXX is this really necessary? */
+ hash_seq_term(&status);
hash_seq_init(&status, PortalHashTable);
}
}
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 2c53f12ce5a..f873f93cc52 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.139.2.3 2005/11/28 23:46:25 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.139.2.4 2007/04/26 23:25:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -382,8 +382,20 @@ typedef struct TupleHashTableData
typedef HASH_SEQ_STATUS TupleHashIterator;
-#define ResetTupleHashIterator(htable, iter) \
+/*
+ * Use InitTupleHashIterator/TermTupleHashIterator for a read/write scan.
+ * Use ResetTupleHashIterator if the table can be frozen (in this case no
+ * explicit scan termination is needed).
+ */
+#define InitTupleHashIterator(htable, iter) \
hash_seq_init(iter, (htable)->hashtab)
+#define TermTupleHashIterator(iter) \
+ hash_seq_term(iter)
+#define ResetTupleHashIterator(htable, iter) \
+ do { \
+ hash_freeze((htable)->hashtab); \
+ hash_seq_init(iter, (htable)->hashtab); \
+ } while (0)
#define ScanTupleHashTable(iter) \
((TupleHashEntry) hash_seq_search(iter))
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h
index 24b43e892c6..cdc70a77d82 100644
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.41.2.1 2006/06/25 18:29:56 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.41.2.2 2007/04/26 23:25:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -119,6 +119,8 @@ typedef struct HTAB
MemoryContext hcxt; /* memory context if default allocator used */
char *tabname; /* table name (for error messages) */
bool isshared; /* true if table is in shared memory */
+ /* freezing a shared table isn't allowed, so we can keep state here */
+ bool frozen; /* true = no more inserts allowed */
} HTAB;
/* Parameter data structure for hash_create */
@@ -185,8 +187,12 @@ extern void *hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action,
bool *foundPtr);
extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
extern void *hash_seq_search(HASH_SEQ_STATUS *status);
+extern void hash_seq_term(HASH_SEQ_STATUS *status);
+extern void hash_freeze(HTAB *hashp);
extern Size hash_estimate_size(long num_entries, Size entrysize);
extern long hash_select_dirsize(long num_entries);
+extern void AtEOXact_HashTables(bool isCommit);
+extern void AtEOSubXact_HashTables(bool isCommit, int nestDepth);
/*
* prototypes for functions in hashfn.c