aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2007-03-29 00:15:39 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2007-03-29 00:15:39 +0000
commitfba8113c1b74b9508cf2e6b7a18b0fb3637d9ba0 (patch)
treeb70081c09aa6f06b442f4f43313e738a693de7ea /src
parent4591fb1aa8c0f8c2d724c2a83e1a336650cca933 (diff)
downloadpostgresql-fba8113c1b74b9508cf2e6b7a18b0fb3637d9ba0.tar.gz
postgresql-fba8113c1b74b9508cf2e6b7a18b0fb3637d9ba0.zip
Teach CLUSTER to skip writing WAL if not needed (ie, not using archiving)
--- Simon. Also, code review and cleanup for the previous COPY-no-WAL patches --- Tom.
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/heap/heapam.c75
-rw-r--r--src/backend/access/heap/tuptoaster.c22
-rw-r--r--src/backend/catalog/index.c9
-rw-r--r--src/backend/commands/cluster.c20
-rw-r--r--src/backend/commands/copy.c112
-rw-r--r--src/backend/executor/execMain.c12
-rw-r--r--src/backend/utils/cache/relcache.c75
-rw-r--r--src/include/access/heapam.h5
-rw-r--r--src/include/access/tuptoaster.h5
-rw-r--r--src/include/utils/rel.h8
-rw-r--r--src/include/utils/relcache.h6
11 files changed, 197 insertions, 152 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 367831a515a..a99aa4ced0a 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.229 2007/03/25 19:45:13 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.230 2007/03/29 00:15:37 tgl Exp $
*
*
* INTERFACE ROUTINES
@@ -1360,11 +1360,14 @@ heap_get_latest_tid(Relation relation,
* that all new tuples go into new pages not containing any tuples from other
* transactions, that the relation gets fsync'd before commit, and that the
* transaction emits at least one WAL record to ensure RecordTransactionCommit
- * will decide to WAL-log the commit. (see heap_sync() comments also)
+ * will decide to WAL-log the commit. (See also heap_sync() comments)
*
* use_fsm is passed directly to RelationGetBufferForTuple, which see for
* more info.
*
+ * Note that use_wal and use_fsm will be applied when inserting into the
+ * heap's TOAST table, too, if the tuple requires any out-of-line data.
+ *
* The return value is the OID assigned to the tuple (either here or by the
* caller), or InvalidOid if no OID. The header fields of *tup are updated
* to match the stored tuple; in particular tup->t_self receives the actual
@@ -1418,7 +1421,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
* into the relation; tup is the caller's original untoasted data.
*/
if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
- heaptup = toast_insert_or_update(relation, tup, NULL, use_wal);
+ heaptup = toast_insert_or_update(relation, tup, NULL,
+ use_wal, use_fsm);
else
heaptup = tup;
@@ -1526,8 +1530,10 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
* simple_heap_insert - insert a tuple
*
* Currently, this routine differs from heap_insert only in supplying
- * a default command ID. But it should be used rather than using
- * heap_insert directly in most places where we are modifying system catalogs.
+ * a default command ID and not allowing access to the speedup options.
+ *
+ * This should be used rather than using heap_insert directly in most places
+ * where we are modifying system catalogs.
*/
Oid
simple_heap_insert(Relation relation, HeapTuple tup)
@@ -1536,18 +1542,6 @@ simple_heap_insert(Relation relation, HeapTuple tup)
}
/*
- * fast_heap_insert - insert a tuple with options to improve speed
- *
- * Currently, this routine allows specifying additional options for speed
- * in certain cases, such as WAL-avoiding COPY command
- */
-Oid
-fast_heap_insert(Relation relation, HeapTuple tup, bool use_wal)
-{
- return heap_insert(relation, tup, GetCurrentCommandId(), use_wal, use_wal);
-}
-
-/*
* heap_delete - delete a tuple
*
* NB: do not call this directly unless you are prepared to deal with
@@ -2112,7 +2106,9 @@ l2:
*/
if (need_toast)
{
- heaptup = toast_insert_or_update(relation, newtup, &oldtup, true);
+ /* Note we always use WAL and FSM during updates */
+ heaptup = toast_insert_or_update(relation, newtup, &oldtup,
+ true, true);
newtupsize = MAXALIGN(heaptup->t_len);
}
else
@@ -3988,23 +3984,40 @@ heap2_desc(StringInfo buf, uint8 xl_info, char *rec)
appendStringInfo(buf, "UNKNOWN");
}
-/* ----------------
- * heap_sync - sync a heap, for use when no WAL has been written
- *
- * ----------------
+/*
+ * heap_sync - sync a heap, for use when no WAL has been written
+ *
+ * This forces the heap contents (including TOAST heap if any) down to disk.
+ * If we skipped using WAL, and it's not a temp relation, we must force the
+ * relation down to disk before it's safe to commit the transaction. This
+ * requires writing out any dirty buffers and then doing a forced fsync.
+ *
+ * Indexes are not touched. (Currently, index operations associated with
+ * the commands that use this are WAL-logged and so do not need fsync.
+ * That behavior might change someday, but in any case it's likely that
+ * any fsync decisions required would be per-index and hence not appropriate
+ * to be done here.)
*/
void
heap_sync(Relation rel)
{
- if (!rel->rd_istemp)
+ /* temp tables never need fsync */
+ if (rel->rd_istemp)
+ return;
+
+ /* main heap */
+ FlushRelationBuffers(rel);
+ /* FlushRelationBuffers will have opened rd_smgr */
+ smgrimmedsync(rel->rd_smgr);
+
+ /* toast heap, if any */
+ if (OidIsValid(rel->rd_rel->reltoastrelid))
{
- /*
- * If we skipped using WAL, and it's not a temp relation,
- * we must force the relation down to disk before it's
- * safe to commit the transaction. This requires forcing
- * out any dirty buffers and then doing a forced fsync.
- */
- FlushRelationBuffers(rel);
- smgrimmedsync(rel->rd_smgr);
+ Relation toastrel;
+
+ toastrel = heap_open(rel->rd_rel->reltoastrelid, AccessShareLock);
+ FlushRelationBuffers(toastrel);
+ smgrimmedsync(toastrel->rd_smgr);
+ heap_close(toastrel, AccessShareLock);
}
}
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index b1eb8aea4d3..b1e02e13755 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.71 2007/02/27 23:48:07 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.72 2007/03/29 00:15:37 tgl Exp $
*
*
* INTERFACE ROUTINES
@@ -33,6 +33,7 @@
#include "access/genam.h"
#include "access/heapam.h"
#include "access/tuptoaster.h"
+#include "access/xact.h"
#include "catalog/catalog.h"
#include "utils/fmgroids.h"
#include "utils/pg_lzcompress.h"
@@ -42,7 +43,8 @@
#undef TOAST_DEBUG
static void toast_delete_datum(Relation rel, Datum value);
-static Datum toast_save_datum(Relation rel, Datum value, bool use_wal);
+static Datum toast_save_datum(Relation rel, Datum value,
+ bool use_wal, bool use_fsm);
static varattrib *toast_fetch_datum(varattrib *attr);
static varattrib *toast_fetch_datum_slice(varattrib *attr,
int32 sliceoffset, int32 length);
@@ -333,6 +335,7 @@ toast_delete(Relation rel, HeapTuple oldtup)
* Inputs:
* newtup: the candidate new tuple to be inserted
* oldtup: the old row version for UPDATE, or NULL for INSERT
+ * use_wal, use_fsm: flags to be passed to heap_insert() for toast rows
* Result:
* either newtup if no toasting is needed, or a palloc'd modified tuple
* that is what should actually get stored
@@ -342,7 +345,8 @@ toast_delete(Relation rel, HeapTuple oldtup)
* ----------
*/
HeapTuple
-toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, bool use_wal)
+toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
+ bool use_wal, bool use_fsm)
{
HeapTuple result_tuple;
TupleDesc tupleDesc;
@@ -618,7 +622,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, bool us
i = biggest_attno;
old_value = toast_values[i];
toast_action[i] = 'p';
- toast_values[i] = toast_save_datum(rel, toast_values[i], use_wal);
+ toast_values[i] = toast_save_datum(rel, toast_values[i],
+ use_wal, use_fsm);
if (toast_free[i])
pfree(DatumGetPointer(old_value));
@@ -729,7 +734,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, bool us
i = biggest_attno;
old_value = toast_values[i];
toast_action[i] = 'p';
- toast_values[i] = toast_save_datum(rel, toast_values[i], use_wal);
+ toast_values[i] = toast_save_datum(rel, toast_values[i],
+ use_wal, use_fsm);
if (toast_free[i])
pfree(DatumGetPointer(old_value));
@@ -977,7 +983,8 @@ toast_compress_datum(Datum value)
* ----------
*/
static Datum
-toast_save_datum(Relation rel, Datum value, bool use_wal)
+toast_save_datum(Relation rel, Datum value,
+ bool use_wal, bool use_fsm)
{
Relation toastrel;
Relation toastidx;
@@ -985,6 +992,7 @@ toast_save_datum(Relation rel, Datum value, bool use_wal)
TupleDesc toasttupDesc;
Datum t_values[3];
bool t_isnull[3];
+ CommandId mycid = GetCurrentCommandId();
varattrib *result;
struct
{
@@ -1063,7 +1071,7 @@ toast_save_datum(Relation rel, Datum value, bool use_wal)
if (!HeapTupleIsValid(toasttup))
elog(ERROR, "failed to build TOAST tuple");
- fast_heap_insert(toastrel, toasttup, use_wal);
+ heap_insert(toastrel, toasttup, mycid, use_wal, use_fsm);
/*
* Create the index entry. We cheat a little here by not using
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index b660a94aabc..377bc9f4f2c 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.281 2007/03/25 19:45:14 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.282 2007/03/29 00:15:37 tgl Exp $
*
*
* INTERFACE ROUTINES
@@ -1248,12 +1248,11 @@ setNewRelfilenode(Relation relation)
heap_close(pg_class, RowExclusiveLock);
- /* Remember we did this in current transaction, to allow later optimisations */
- relation->rd_newRelfilenodeSubid = GetCurrentSubTransactionId();
- RelationCacheResetAtEOXact();
-
/* Make sure the relfilenode change is visible */
CommandCounterIncrement();
+
+ /* Mark the rel as having a new relfilenode in current transaction */
+ RelationCacheMarkNewRelfilenode(relation);
}
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index aa911369409..ac771b77a60 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.157 2007/03/13 00:33:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.158 2007/03/29 00:15:37 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -653,6 +653,8 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
char *nulls;
IndexScanDesc scan;
HeapTuple tuple;
+ CommandId mycid = GetCurrentCommandId();
+ bool use_wal;
/*
* Open the relations we need.
@@ -676,6 +678,17 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
memset(nulls, 'n', natts * sizeof(char));
/*
+ * We need to log the copied data in WAL iff WAL archiving is enabled AND
+ * it's not a temp rel. (Since we know the target relation is new and
+ * can't have any FSM data, we can always tell heap_insert to ignore FSM,
+ * even when using WAL.)
+ */
+ use_wal = XLogArchivingActive() && !NewHeap->rd_istemp;
+
+ /* use_wal off requires rd_targblock be initially invalid */
+ Assert(NewHeap->rd_targblock == InvalidBlockNumber);
+
+ /*
* Scan through the OldHeap on the OldIndex and copy each tuple into the
* NewHeap.
*/
@@ -722,7 +735,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
if (NewHeap->rd_rel->relhasoids)
HeapTupleSetOid(copiedTuple, HeapTupleGetOid(tuple));
- simple_heap_insert(NewHeap, copiedTuple);
+ heap_insert(NewHeap, copiedTuple, mycid, use_wal, false);
heap_freetuple(copiedTuple);
@@ -734,6 +747,9 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
pfree(values);
pfree(nulls);
+ if (!use_wal)
+ heap_sync(NewHeap);
+
index_close(OldIndex, NoLock);
heap_close(OldHeap, NoLock);
heap_close(NewHeap, NoLock);
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index a2e1939ea25..99d347f5907 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.278 2007/03/13 00:33:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.279 2007/03/29 00:15:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1125,11 +1125,10 @@ DoCopy(const CopyStmt *stmt, const char *queryString)
cstate->copy_dest = COPY_FILE; /* default */
cstate->filename = stmt->filename;
- if (is_from) /* copy from file to database */
- CopyFrom(cstate);
+ if (is_from)
+ CopyFrom(cstate); /* copy from file to database */
else
- /* copy from database to file */
- DoCopyTo(cstate);
+ DoCopyTo(cstate); /* copy from database to file */
/*
* Close the relation or query. If reading, we can release the
@@ -1640,7 +1639,9 @@ CopyFrom(CopyState cstate)
ExprContext *econtext; /* used for ExecEvalExpr for default atts */
MemoryContext oldcontext = CurrentMemoryContext;
ErrorContextCallback errcontext;
- bool use_wal = true; /* By default, we use WAL to log db changes */
+ CommandId mycid = GetCurrentCommandId();
+ bool use_wal = true; /* by default, use WAL logging */
+ bool use_fsm = true; /* by default, use FSM for free space */
Assert(cstate->rel);
@@ -1663,6 +1664,48 @@ CopyFrom(CopyState cstate)
RelationGetRelationName(cstate->rel))));
}
+ /*----------
+ * Check to see if we can avoid writing WAL
+ *
+ * If archive logging is not enabled *and* either
+ * - table was created in same transaction as this COPY
+ * - data is being written to relfilenode created in this transaction
+ * then we can skip writing WAL. It's safe because if the transaction
+ * doesn't commit, we'll discard the table (or the new relfilenode file).
+ * If it does commit, we'll have done the heap_sync at the bottom of this
+ * routine first.
+ *
+ * As mentioned in comments in utils/rel.h, the in-same-transaction test
+ * is not completely reliable, since in rare cases rd_createSubid or
+ * rd_newRelfilenodeSubid can be cleared before the end of the transaction.
+ * However this is OK since at worst we will fail to make the optimization.
+ *
+ * When skipping WAL it's entirely possible that COPY itself will write no
+ * WAL records at all. This is of concern because RecordTransactionCommit
+ * might decide it doesn't need to log our eventual commit, which we
+ * certainly need it to do. However, we need no special action here for
+ * that, because if we have a new table or new relfilenode then there
+ * must have been a WAL-logged pg_class update earlier in the transaction.
+ *
+ * Also, if the target file is new-in-transaction, we assume that checking
+ * FSM for free space is a waste of time, even if we must use WAL because
+ * of archiving. This could possibly be wrong, but it's unlikely.
+ *
+ * The comments for heap_insert and RelationGetBufferForTuple specify that
+ * skipping WAL logging is only safe if we ensure that our tuples do not
+ * go into pages containing tuples from any other transactions --- but this
+ * must be the case if we have a new table or new relfilenode, so we need
+ * no additional work to enforce that.
+ *----------
+ */
+ if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
+ cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
+ {
+ use_fsm = false;
+ if (!XLogArchivingActive())
+ use_wal = false;
+ }
+
if (pipe)
{
if (whereToSendOutput == DestRemote)
@@ -1832,28 +1875,6 @@ CopyFrom(CopyState cstate)
nfields = file_has_oids ? (attr_count + 1) : attr_count;
field_strings = (char **) palloc(nfields * sizeof(char *));
- /*
- * Check for performance optimization by avoiding WAL writes
- *
- * If archive logging is not be enabled *and* either
- * - table is created in same transaction as this COPY
- * - table data is now being written to new relfilenode
- * then we can safely avoid writing WAL. Why?
- * The data files for the table plus toast table/index, plus any indexes
- * will all be dropped at the end of the transaction if it fails, so we
- * do not need to worry about inconsistent states.
- * As mentioned in comments in utils/rel.h, the in-same-transaction test is
- * not completely reliable, since rd_createSubId can be reset to zero in
- * certain cases before the end of the creating transaction.
- * We are doing this for performance only, so we only need to know:
- * if rd_createSubid != InvalidSubTransactionId then it is *always* just
- * created. If we have PITR enabled, then we *must* use_wal
- */
- if ((cstate->rel->rd_createSubid != InvalidSubTransactionId ||
- cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
- && !XLogArchivingActive())
- use_wal = false;
-
/* Initialize state variables */
cstate->fe_eof = false;
cstate->eol_type = EOL_UNKNOWN;
@@ -2087,7 +2108,7 @@ CopyFrom(CopyState cstate)
ExecConstraints(resultRelInfo, slot, estate);
/* OK, store the tuple and create index entries for it */
- fast_heap_insert(cstate->rel, tuple, use_wal);
+ heap_insert(cstate->rel, tuple, mycid, use_wal, use_fsm);
if (resultRelInfo->ri_NumIndices > 0)
ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
@@ -2104,32 +2125,6 @@ CopyFrom(CopyState cstate)
}
}
- /*
- * If we skipped writing WAL for heaps, then we need to sync
- */
- if (!use_wal)
- {
- /* main heap */
- heap_sync(cstate->rel);
-
- /* main heap indexes, if any */
- /* we always use WAL for index inserts, so no need to sync */
-
- /* toast heap, if any */
- if (OidIsValid(cstate->rel->rd_rel->reltoastrelid))
- {
- Relation toastrel;
-
- toastrel = heap_open(cstate->rel->rd_rel->reltoastrelid,
- AccessShareLock);
- heap_sync(toastrel);
- heap_close(toastrel, AccessShareLock);
- }
-
- /* toast index, if toast heap */
- /* we always use WAL for index inserts, so no need to sync */
- }
-
/* Done, clean up */
error_context_stack = errcontext.previous;
@@ -2164,6 +2159,13 @@ CopyFrom(CopyState cstate)
errmsg("could not read from file \"%s\": %m",
cstate->filename)));
}
+
+ /*
+ * If we skipped writing WAL, then we need to sync the heap (but not
+ * indexes since those use WAL anyway)
+ */
+ if (!use_wal)
+ heap_sync(cstate->rel);
}
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index b2f7159e8c0..cacd7c6fe70 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -26,7 +26,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.291 2007/03/25 19:45:14 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.292 2007/03/29 00:15:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -2541,14 +2541,8 @@ CloseIntoRel(QueryDesc *queryDesc)
/* OpenIntoRel might never have gotten called */
if (estate->es_into_relation_descriptor)
{
- /*
- * If we skipped using WAL, and it's not a temp relation, we must
- * force the relation down to disk before it's safe to commit the
- * transaction. This requires forcing out any dirty buffers and then
- * doing a forced fsync.
- */
- if (!estate->es_into_relation_use_wal &&
- !estate->es_into_relation_descriptor->rd_istemp)
+ /* If we skipped using WAL, must heap_sync before commit */
+ if (!estate->es_into_relation_use_wal)
heap_sync(estate->es_into_relation_descriptor);
/* close rel, but keep lock until commit */
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 91b7f146b43..d8bd36bc94f 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.258 2007/03/19 23:38:29 wieck Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.259 2007/03/29 00:15:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1572,7 +1572,8 @@ RelationClose(Relation relation)
#ifdef RELCACHE_FORCE_RELEASE
if (RelationHasReferenceCountZero(relation) &&
- relation->rd_createSubid == InvalidSubTransactionId)
+ relation->rd_createSubid == InvalidSubTransactionId &&
+ relation->rd_newRelfilenodeSubid == InvalidSubTransactionId)
RelationClearRelation(relation, false);
#endif
}
@@ -1759,11 +1760,12 @@ RelationClearRelation(Relation relation, bool rebuild)
{
/*
* When rebuilding an open relcache entry, must preserve ref count and
- * rd_createSubid state. Also attempt to preserve the tupledesc and
- * rewrite-rule substructures in place. (Note: the refcount mechanism
- * for tupledescs may eventually ensure that we don't really need to
- * preserve the tupledesc in-place, but for now there are still a lot
- * of places that assume an open rel's tupledesc won't move.)
+ * rd_createSubid/rd_newRelfilenodeSubid state. Also attempt to
+ * preserve the tupledesc and rewrite-rule substructures in place.
+ * (Note: the refcount mechanism for tupledescs may eventually ensure
+ * that we don't really need to preserve the tupledesc in-place, but
+ * for now there are still a lot of places that assume an open rel's
+ * tupledesc won't move.)
*
* Note that this process does not touch CurrentResourceOwner; which
* is good because whatever ref counts the entry may have do not
@@ -1839,7 +1841,7 @@ RelationFlushRelation(Relation relation)
/*
* New relcache entries are always rebuilt, not flushed; else we'd
* forget the "new" status of the relation, which is a useful
- * optimization to have.
+ * optimization to have. Ditto for the new-relfilenode status.
*/
rebuild = true;
}
@@ -1916,6 +1918,8 @@ RelationCacheInvalidateEntry(Oid relationId)
* so we do not touch new-in-transaction relations; they cannot be targets
* of cross-backend SI updates (and our own updates now go through a
* separate linked list that isn't limited by the SI message buffer size).
+ * Likewise, we need not discard new-relfilenode-in-transaction hints,
+ * since any invalidation of those would be a local event.
*
* We do this in two phases: the first pass deletes deletable items, and
* the second one rebuilds the rebuildable items. This is essential for
@@ -1958,14 +1962,6 @@ RelationCacheInvalidate(void)
if (relation->rd_createSubid != InvalidSubTransactionId)
continue;
- /*
- * Reset newRelfilenode hint. It is never used for correctness, only
- * for performance optimization. An incorrectly set hint can lead
- * to data loss in some circumstances, so play safe.
- */
- if (relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
- relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
-
relcacheInvalsReceived++;
if (RelationHasReferenceCountZero(relation))
@@ -2018,17 +2014,6 @@ RelationCacheInvalidate(void)
}
/*
- * RelationCacheResetAtEOXact
- *
- * Register that work will be required at main-transaction commit or abort
- */
-void
-RelationCacheResetAtEOXact(void)
-{
- need_eoxact_work = true;
-}
-
-/*
* AtEOXact_RelationCache
*
* Clean up the relcache at main-transaction commit or abort.
@@ -2056,9 +2041,10 @@ AtEOXact_RelationCache(bool isCommit)
* the debug-only Assert checks, most transactions don't create any work
* for us to do here, so we keep a static flag that gets set if there is
* anything to do. (Currently, this means either a relation is created in
- * the current xact, or an index list is forced.) For simplicity, the
- * flag remains set till end of top-level transaction, even though we
- * could clear it at subtransaction end in some cases.
+ * the current xact, or one is given a new relfilenode, or an index list
+ * is forced.) For simplicity, the flag remains set till end of top-level
+ * transaction, even though we could clear it at subtransaction end in
+ * some cases.
*/
if (!need_eoxact_work
#ifdef USE_ASSERT_CHECKING
@@ -2111,6 +2097,10 @@ AtEOXact_RelationCache(bool isCommit)
continue;
}
}
+
+ /*
+ * Likewise, reset the hint about the relfilenode being new.
+ */
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
/*
@@ -2173,6 +2163,10 @@ AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
continue;
}
}
+
+ /*
+ * Likewise, update or drop any new-relfilenode-in-subtransaction hint.
+ */
if (relation->rd_newRelfilenodeSubid == mySubid)
{
if (isCommit)
@@ -2195,6 +2189,23 @@ AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
}
/*
+ * RelationCacheMarkNewRelfilenode
+ *
+ * Mark the rel as having been given a new relfilenode in the current
+ * (sub) transaction. This is a hint that can be used to optimize
+ * later operations on the rel in the same transaction.
+ */
+void
+RelationCacheMarkNewRelfilenode(Relation rel)
+{
+ /* Mark it... */
+ rel->rd_newRelfilenodeSubid = GetCurrentSubTransactionId();
+ /* ... and now we have eoxact cleanup work to do */
+ need_eoxact_work = true;
+}
+
+
+/*
* RelationBuildLocalRelation
* Build a relcache entry for an about-to-be-created relation,
* and enter it into the relcache.
@@ -2272,7 +2283,7 @@ RelationBuildLocalRelation(const char *relname,
rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
/* must flag that we have rels created in this transaction */
- RelationCacheResetAtEOXact();
+ need_eoxact_work = true;
/* is it a temporary relation? */
rel->rd_istemp = isTempNamespace(relnamespace);
@@ -2928,7 +2939,7 @@ RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex)
relation->rd_oidindex = oidIndex;
relation->rd_indexvalid = 2; /* mark list as forced */
/* must flag that we have a forced index list */
- RelationCacheResetAtEOXact();
+ need_eoxact_work = true;
}
/*
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 3a689599570..6c7c98b3f28 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.120 2007/01/25 02:17:26 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.121 2007/03/29 00:15:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -178,9 +178,6 @@ extern void simple_heap_delete(Relation relation, ItemPointer tid);
extern void simple_heap_update(Relation relation, ItemPointer otid,
HeapTuple tup);
-extern Oid fast_heap_insert(Relation relation, HeapTuple tup, bool use_wal);
-
-
extern void heap_markpos(HeapScanDesc scan);
extern void heap_restrpos(HeapScanDesc scan);
diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h
index 3693379dba7..6cc0bdcbe8c 100644
--- a/src/include/access/tuptoaster.h
+++ b/src/include/access/tuptoaster.h
@@ -6,7 +6,7 @@
*
* Copyright (c) 2000-2007, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/access/tuptoaster.h,v 1.32 2007/02/05 04:22:18 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/tuptoaster.h,v 1.33 2007/03/29 00:15:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -79,7 +79,8 @@
* ----------
*/
extern HeapTuple toast_insert_or_update(Relation rel,
- HeapTuple newtup, HeapTuple oldtup, bool use_wal);
+ HeapTuple newtup, HeapTuple oldtup,
+ bool use_wal, bool use_fsm);
/* ----------
* toast_delete -
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 2963cc6616a..33795de2bf8 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.99 2007/03/19 23:38:32 wieck Exp $
+ * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.100 2007/03/29 00:15:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -138,13 +138,17 @@ typedef struct RelationData
char rd_indexvalid; /* state of rd_indexlist: 0 = not valid, 1 =
* valid, 2 = temporarily forced */
SubTransactionId rd_createSubid; /* rel was created in current xact */
- SubTransactionId rd_newRelfilenodeSubid; /* rel had new relfilenode in current xact */
+ SubTransactionId rd_newRelfilenodeSubid; /* new relfilenode assigned
+ * in current xact */
/*
* rd_createSubid is the ID of the highest subtransaction the rel has
* survived into; or zero if the rel was not created in the current top
* transaction. This should be relied on only for optimization purposes;
* it is possible for new-ness to be "forgotten" (eg, after CLUSTER).
+ * Likewise, rd_newRelfilenodeSubid is the ID of the highest subtransaction
+ * the relfilenode change has survived into, or zero if not changed in
+ * the current transaction (or we have forgotten changing it).
*/
Form_pg_class rd_rel; /* RELATION tuple */
TupleDesc rd_att; /* tuple descriptor */
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index c7b549d1cff..25b60082a09 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.58 2007/03/03 20:08:41 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.59 2007/03/29 00:15:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -60,12 +60,12 @@ extern void RelationCacheInvalidateEntry(Oid relationId);
extern void RelationCacheInvalidate(void);
-extern void RelationCacheResetAtEOXact(void);
-
extern void AtEOXact_RelationCache(bool isCommit);
extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
SubTransactionId parentSubid);
+extern void RelationCacheMarkNewRelfilenode(Relation rel);
+
/*
* Routines to help manage rebuilding of relcache init file
*/