diff options
Diffstat (limited to 'src/backend/access')
-rw-r--r-- | src/backend/access/gin/README | 9 | ||||
-rw-r--r-- | src/backend/access/gin/ginvacuum.c | 19 | ||||
-rw-r--r-- | src/backend/access/gist/gistvacuum.c | 58 | ||||
-rw-r--r-- | src/backend/access/heap/README.HOT | 23 | ||||
-rw-r--r-- | src/backend/access/heap/heapam.c | 187 | ||||
-rw-r--r-- | src/backend/access/heap/pruneheap.c | 135 | ||||
-rw-r--r-- | src/backend/access/nbtree/README | 11 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtpage.c | 13 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtree.c | 49 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtxlog.c | 6 | ||||
-rw-r--r-- | src/backend/access/transam/xact.c | 33 | ||||
-rw-r--r-- | src/backend/access/transam/xlog.c | 28 |
12 files changed, 107 insertions, 464 deletions
diff --git a/src/backend/access/gin/README b/src/backend/access/gin/README index af65efcb542..cd406935e05 100644 --- a/src/backend/access/gin/README +++ b/src/backend/access/gin/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/access/gin/README,v 1.6 2008/07/08 03:25:42 neilc Exp $ +$PostgreSQL: pgsql/src/backend/access/gin/README,v 1.7 2010/02/08 04:33:52 tgl Exp $ Gin for PostgreSQL ================== @@ -98,13 +98,6 @@ We appreciate any comments, help and suggestions. * Teach optimizer/executor that GIN is intrinsically clustered. i.e., it always returns ItemPointer in ascending order. * Tweak gincostestimate. - * GIN stores several ItemPointer to heap tuple, so VACUUM FULL produces - this warning message: - - WARNING: index "idx" contains 88395 row versions, but table contains - 51812 row versions - HINT: Rebuild the index with REINDEX. - **** Workaround added TODO ---- diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index 014db7fb739..a13b99cdfac 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.32 2010/01/02 16:57:33 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.33 2010/02/08 04:33:52 tgl Exp $ *------------------------------------------------------------------------- */ @@ -745,13 +745,9 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) stats->estimated_count = info->estimated_count; /* - * If vacuum full, we already have exclusive lock on the index. Otherwise, - * need lock unless it's local to this backend. + * Need lock unless it's local to this backend. */ - if (info->vacuum_full) - needLock = false; - else - needLock = !RELATION_IS_LOCAL(index); + needLock = !RELATION_IS_LOCAL(index); if (needLock) LockRelationForExtension(index, ExclusiveLock); @@ -785,15 +781,6 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) } lastBlock = npages - 1; - if (info->vacuum_full && lastBlock > lastFilledBlock) - { - /* try to truncate index */ - RelationTruncate(index, lastFilledBlock + 1); - - stats->pages_removed = lastBlock - lastFilledBlock; - totFreePages = totFreePages - stats->pages_removed; - } - /* Finally, vacuum the FSM */ IndexFreeSpaceMapVacuum(info->index); diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index af26fb0311f..178ef40a35d 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.46 2010/01/02 16:57:34 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.47 2010/02/08 04:33:52 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,7 +29,7 @@ typedef struct GistBulkDeleteResult { IndexBulkDeleteResult std; /* common state */ - bool needFullVacuum; + bool needReindex; } GistBulkDeleteResult; typedef struct @@ -496,12 +496,8 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion) } /* - * For usual vacuum just update FSM, for full vacuum - * reforms parent tuples if some of childs was deleted or changed, - * update invalid tuples (they can exist from last crash recovery only), - * tries to get smaller index + * VACUUM cleanup: update FSM */ - Datum gistvacuumcleanup(PG_FUNCTION_ARGS) { @@ -533,47 +529,15 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) */ } - /* gistVacuumUpdate may cause hard work */ - if (info->vacuum_full) - { - GistVacuum gv; - ArrayTuple res; - - /* note: vacuum.c already acquired AccessExclusiveLock on index */ - - gv.index = rel; - initGISTstate(&(gv.giststate), rel); - gv.opCtx = createTempGistContext(); - gv.result = stats; - gv.strategy = info->strategy; - - /* walk through the entire index for update tuples */ - res = gistVacuumUpdate(&gv, GIST_ROOT_BLKNO, false); - /* cleanup */ - if (res.itup) - { - int i; - - for (i = 0; i < res.ituplen; i++) - pfree(res.itup[i]); - pfree(res.itup); - } - freeGISTstate(&(gv.giststate)); - MemoryContextDelete(gv.opCtx); - } - else if (stats->needFullVacuum) + if (stats->needReindex) ereport(NOTICE, (errmsg("index \"%s\" needs VACUUM FULL or REINDEX to finish crash recovery", RelationGetRelationName(rel)))); /* - * If vacuum full, we already have exclusive lock on the index. Otherwise, - * need lock unless it's local to this backend. + * Need lock unless it's local to this backend. */ - if (info->vacuum_full) - needLock = false; - else - needLock = !RELATION_IS_LOCAL(rel); + needLock = !RELATION_IS_LOCAL(rel); /* try to find deleted pages */ if (needLock) @@ -606,14 +570,6 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) } lastBlock = npages - 1; - if (info->vacuum_full && lastFilledBlock < lastBlock) - { /* try to truncate index */ - RelationTruncate(rel, lastFilledBlock + 1); - - stats->std.pages_removed = lastBlock - lastFilledBlock; - totFreePages = totFreePages - stats->std.pages_removed; - } - /* Finally, vacuum the FSM */ IndexFreeSpaceMapVacuum(info->index); @@ -799,7 +755,7 @@ gistbulkdelete(PG_FUNCTION_ARGS) stack->next = ptr; if (GistTupleIsInvalid(idxtuple)) - stats->needFullVacuum = true; + stats->needReindex = true; } } diff --git a/src/backend/access/heap/README.HOT b/src/backend/access/heap/README.HOT index 76ac83722f5..fb3e9d31ad2 100644 --- a/src/backend/access/heap/README.HOT +++ b/src/backend/access/heap/README.HOT @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/access/heap/README.HOT,v 1.4 2008/10/02 20:59:31 momjian Exp $ +$PostgreSQL: pgsql/src/backend/access/heap/README.HOT,v 1.5 2010/02/08 04:33:52 tgl Exp $ Heap Only Tuples (HOT) ====================== @@ -255,27 +255,6 @@ dead heap-only tuples, and cleans up any dead line pointers as if they were regular dead tuples. -VACUUM FULL ------------ - -VACUUM FULL performs an extra operation of collapsing out redirecting line -pointers, by moving the first non-DEAD tuple of each HOT chain to the root -position and clearing its heap-only-tuple flag. This effectively changes -the user-visible CTID of that tuple. This would be completely unsafe -during normal concurrent operation, but since VACUUM FULL takes full -exclusive lock on the table, it should be OK. (Note that VACUUM FULL has -always felt free to change tuples' CTIDs by moving them across pages.) -Eliminating redirection links means that the main body of VACUUM FULL -doesn't have to deal with them, which seems a good thing since VACUUM FULL -is horrendously complex already. - -When VACUUM FULL tries to move tuple chains, it does not distinguish regular -and heap-only tuples, but just moves both types the same. This is OK because -it will move the entire non-DEAD tail of an update chain and remove index -entries for each item moved. At worst, we'll uselessly search for index -entries matching the heap-only tuples included in the move. - - Statistics ---------- diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 0d9ad2af045..9983ff65c8a 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.285 2010/02/03 10:01:29 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.286 2010/02/08 04:33:52 tgl Exp $ * * * INTERFACE ROUTINES @@ -79,7 +79,7 @@ static HeapScanDesc heap_beginscan_internal(Relation relation, bool allow_strat, bool allow_sync, bool is_bitmapscan); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, - ItemPointerData from, Buffer newbuf, HeapTuple newtup, bool move, + ItemPointerData from, Buffer newbuf, HeapTuple newtup, bool all_visible_cleared, bool new_all_visible_cleared); static bool HeapSatisfiesHOTUpdate(Relation relation, Bitmapset *hot_attrs, HeapTuple oldtup, HeapTuple newtup); @@ -2785,7 +2785,7 @@ l2: if (!relation->rd_istemp) { XLogRecPtr recptr = log_heap_update(relation, buffer, oldtup.t_self, - newbuf, heaptup, false, + newbuf, heaptup, all_visible_cleared, all_visible_cleared_new); @@ -3664,9 +3664,13 @@ recheck_xmax: } /* - * Although xvac per se could only be set by VACUUM, it shares physical - * storage space with cmax, and so could be wiped out by someone setting - * xmax. Hence recheck after changing lock, same as for xmax itself. + * Although xvac per se could only be set by old-style VACUUM FULL, it + * shares physical storage space with cmax, and so could be wiped out by + * someone setting xmax. Hence recheck after changing lock, same as for + * xmax itself. + * + * Old-style VACUUM FULL is gone, but we have to keep this code as long + * as we support having MOVED_OFF/MOVED_IN tuples in the database. */ recheck_xvac: if (tuple->t_infomask & HEAP_MOVED) @@ -3785,8 +3789,7 @@ HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, TransactionId xmax = HeapTupleHeaderGetXmax(tuple); TransactionId xvac = HeapTupleHeaderGetXvac(tuple); - if (tuple->t_infomask & HEAP_MOVED_OFF || - tuple->t_infomask & HEAP_MOVED_IN) + if (tuple->t_infomask & HEAP_MOVED) { if (TransactionIdPrecedes(*latestRemovedXid, xvac)) *latestRemovedXid = xvac; @@ -3844,7 +3847,7 @@ log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused, - TransactionId latestRemovedXid, bool redirect_move) + TransactionId latestRemovedXid) { xl_heap_clean xlrec; uint8 info; @@ -3915,7 +3918,7 @@ log_heap_clean(Relation reln, Buffer buffer, rdata[3].buffer_std = true; rdata[3].next = NULL; - info = redirect_move ? XLOG_HEAP2_CLEAN_MOVE : XLOG_HEAP2_CLEAN; + info = XLOG_HEAP2_CLEAN; recptr = XLogInsert(RM_HEAP2_ID, info, rdata); return recptr; @@ -3970,23 +3973,11 @@ log_heap_freeze(Relation reln, Buffer buffer, */ static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, - Buffer newbuf, HeapTuple newtup, bool move, + Buffer newbuf, HeapTuple newtup, bool all_visible_cleared, bool new_all_visible_cleared) { - /* - * Note: xlhdr is declared to have adequate size and correct alignment for - * an xl_heap_header. However the two tids, if present at all, will be - * packed in with no wasted space after the xl_heap_header; they aren't - * necessarily aligned as implied by this struct declaration. - */ - struct - { - xl_heap_header hdr; - TransactionId tid1; - TransactionId tid2; - } xlhdr; - int hsize = SizeOfHeapHeader; xl_heap_update xlrec; + xl_heap_header xlhdr; uint8 info; XLogRecPtr recptr; XLogRecData rdata[4]; @@ -3995,12 +3986,7 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, /* Caller should not call me on a temp relation */ Assert(!reln->rd_istemp); - if (move) - { - Assert(!HeapTupleIsHeapOnly(newtup)); - info = XLOG_HEAP_MOVE; - } - else if (HeapTupleIsHeapOnly(newtup)) + if (HeapTupleIsHeapOnly(newtup)) info = XLOG_HEAP_HOT_UPDATE; else info = XLOG_HEAP_UPDATE; @@ -4022,30 +4008,16 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, rdata[1].buffer_std = true; rdata[1].next = &(rdata[2]); - xlhdr.hdr.t_infomask2 = newtup->t_data->t_infomask2; - xlhdr.hdr.t_infomask = newtup->t_data->t_infomask; - xlhdr.hdr.t_hoff = newtup->t_data->t_hoff; - if (move) /* remember xmax & xmin */ - { - TransactionId xid[2]; /* xmax, xmin */ - - if (newtup->t_data->t_infomask & (HEAP_XMAX_INVALID | HEAP_IS_LOCKED)) - xid[0] = InvalidTransactionId; - else - xid[0] = HeapTupleHeaderGetXmax(newtup->t_data); - xid[1] = HeapTupleHeaderGetXmin(newtup->t_data); - memcpy((char *) &xlhdr + hsize, - (char *) xid, - 2 * sizeof(TransactionId)); - hsize += 2 * sizeof(TransactionId); - } + xlhdr.t_infomask2 = newtup->t_data->t_infomask2; + xlhdr.t_infomask = newtup->t_data->t_infomask; + xlhdr.t_hoff = newtup->t_data->t_hoff; /* * As with insert records, we need not store the rdata[2] segment if we * decide to store the whole buffer instead. */ rdata[2].data = (char *) &xlhdr; - rdata[2].len = hsize; + rdata[2].len = SizeOfHeapHeader; rdata[2].buffer = newbuf; rdata[2].buffer_std = true; rdata[2].next = &(rdata[3]); @@ -4071,19 +4043,6 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, } /* - * Perform XLogInsert for a heap-move operation. Caller must already - * have modified the buffers and marked them dirty. - */ -XLogRecPtr -log_heap_move(Relation reln, Buffer oldbuf, ItemPointerData from, - Buffer newbuf, HeapTuple newtup, - bool all_visible_cleared, bool new_all_visible_cleared) -{ - return log_heap_update(reln, oldbuf, from, newbuf, newtup, true, - all_visible_cleared, new_all_visible_cleared); -} - -/* * Perform XLogInsert of a HEAP_NEWPAGE record to WAL. Caller is responsible * for writing the page to disk after calling this routine. * @@ -4149,10 +4108,10 @@ heap_xlog_cleanup_info(XLogRecPtr lsn, XLogRecord *record) } /* - * Handles CLEAN and CLEAN_MOVE record types + * Handles HEAP_CLEAN record type */ static void -heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move) +heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record) { xl_heap_clean *xlrec = (xl_heap_clean *) XLogRecGetData(record); Buffer buffer; @@ -4171,7 +4130,8 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move) * no queries running for which the removed tuples are still visible. */ if (InHotStandby) - ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node); + ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, + xlrec->node); RestoreBkpBlocks(lsn, record, true); @@ -4203,8 +4163,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move) heap_page_prune_execute(buffer, redirected, nredirected, nowdead, ndead, - nowunused, nunused, - clean_move); + nowunused, nunused); freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ @@ -4489,10 +4448,10 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record) } /* - * Handles UPDATE, HOT_UPDATE & MOVE + * Handles UPDATE and HOT_UPDATE */ static void -heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move, bool hot_update) +heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update) { xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record); Buffer buffer; @@ -4558,33 +4517,19 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move, bool hot_update) htup = (HeapTupleHeader) PageGetItem(page, lp); - if (move) - { - htup->t_infomask &= ~(HEAP_XMIN_COMMITTED | - HEAP_XMIN_INVALID | - HEAP_MOVED_IN); - htup->t_infomask |= HEAP_MOVED_OFF; - HeapTupleHeaderClearHotUpdated(htup); - HeapTupleHeaderSetXvac(htup, record->xl_xid); - /* Make sure there is no forward chain link in t_ctid */ - htup->t_ctid = xlrec->target.tid; - } + htup->t_infomask &= ~(HEAP_XMAX_COMMITTED | + HEAP_XMAX_INVALID | + HEAP_XMAX_IS_MULTI | + HEAP_IS_LOCKED | + HEAP_MOVED); + if (hot_update) + HeapTupleHeaderSetHotUpdated(htup); else - { - htup->t_infomask &= ~(HEAP_XMAX_COMMITTED | - HEAP_XMAX_INVALID | - HEAP_XMAX_IS_MULTI | - HEAP_IS_LOCKED | - HEAP_MOVED); - if (hot_update) - HeapTupleHeaderSetHotUpdated(htup); - else - HeapTupleHeaderClearHotUpdated(htup); - HeapTupleHeaderSetXmax(htup, record->xl_xid); - HeapTupleHeaderSetCmax(htup, FirstCommandId, false); - /* Set forward chain link in t_ctid */ - htup->t_ctid = xlrec->newtid; - } + HeapTupleHeaderClearHotUpdated(htup); + HeapTupleHeaderSetXmax(htup, record->xl_xid); + HeapTupleHeaderSetCmax(htup, FirstCommandId, false); + /* Set forward chain link in t_ctid */ + htup->t_ctid = xlrec->newtid; /* Mark the page as a candidate for pruning */ PageSetPrunable(page, record->xl_xid); @@ -4655,8 +4600,6 @@ newsame:; elog(PANIC, "heap_update_redo: invalid max offset number"); hsize = SizeOfHeapUpdate + SizeOfHeapHeader; - if (move) - hsize += (2 * sizeof(TransactionId)); newlen = record->xl_len - hsize; Assert(newlen <= MaxHeapTupleSize); @@ -4674,22 +4617,8 @@ newsame:; htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; - if (move) - { - TransactionId xid[2]; /* xmax, xmin */ - - memcpy((char *) xid, - (char *) xlrec + SizeOfHeapUpdate + SizeOfHeapHeader, - 2 * sizeof(TransactionId)); - HeapTupleHeaderSetXmin(htup, xid[1]); - HeapTupleHeaderSetXmax(htup, xid[0]); - HeapTupleHeaderSetXvac(htup, record->xl_xid); - } - else - { - HeapTupleHeaderSetXmin(htup, record->xl_xid); - HeapTupleHeaderSetCmin(htup, FirstCommandId); - } + HeapTupleHeaderSetXmin(htup, record->xl_xid); + HeapTupleHeaderSetCmin(htup, FirstCommandId); /* Make sure there is no forward chain link in t_ctid */ htup->t_ctid = xlrec->newtid; @@ -4857,13 +4786,10 @@ heap_redo(XLogRecPtr lsn, XLogRecord *record) heap_xlog_delete(lsn, record); break; case XLOG_HEAP_UPDATE: - heap_xlog_update(lsn, record, false, false); - break; - case XLOG_HEAP_MOVE: - heap_xlog_update(lsn, record, true, false); + heap_xlog_update(lsn, record, false); break; case XLOG_HEAP_HOT_UPDATE: - heap_xlog_update(lsn, record, false, true); + heap_xlog_update(lsn, record, true); break; case XLOG_HEAP_NEWPAGE: heap_xlog_newpage(lsn, record); @@ -4895,10 +4821,7 @@ heap2_redo(XLogRecPtr lsn, XLogRecord *record) heap_xlog_freeze(lsn, record); break; case XLOG_HEAP2_CLEAN: - heap_xlog_clean(lsn, record, false); - break; - case XLOG_HEAP2_CLEAN_MOVE: - heap_xlog_clean(lsn, record, true); + heap_xlog_clean(lsn, record); break; case XLOG_HEAP2_CLEANUP_INFO: heap_xlog_cleanup_info(lsn, record); @@ -4953,19 +4876,6 @@ heap_desc(StringInfo buf, uint8 xl_info, char *rec) ItemPointerGetBlockNumber(&(xlrec->newtid)), ItemPointerGetOffsetNumber(&(xlrec->newtid))); } - else if (info == XLOG_HEAP_MOVE) - { - xl_heap_update *xlrec = (xl_heap_update *) rec; - - if (xl_info & XLOG_HEAP_INIT_PAGE) - appendStringInfo(buf, "move(init): "); - else - appendStringInfo(buf, "move: "); - out_target(buf, &(xlrec->target)); - appendStringInfo(buf, "; new %u/%u", - ItemPointerGetBlockNumber(&(xlrec->newtid)), - ItemPointerGetOffsetNumber(&(xlrec->newtid))); - } else if (info == XLOG_HEAP_HOT_UPDATE) { xl_heap_update *xlrec = (xl_heap_update *) rec; @@ -5037,15 +4947,6 @@ heap2_desc(StringInfo buf, uint8 xl_info, char *rec) xlrec->node.relNode, xlrec->block, xlrec->latestRemovedXid); } - else if (info == XLOG_HEAP2_CLEAN_MOVE) - { - xl_heap_clean *xlrec = (xl_heap_clean *) rec; - - appendStringInfo(buf, "clean_move: rel %u/%u/%u; blk %u remxid %u", - xlrec->node.spcNode, xlrec->node.dbNode, - xlrec->node.relNode, xlrec->block, - xlrec->latestRemovedXid); - } else if (info == XLOG_HEAP2_CLEANUP_INFO) { xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) rec; diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 3d7c5c7c0be..9d6a737277b 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/pruneheap.c,v 1.20 2010/01/02 16:57:34 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/pruneheap.c,v 1.21 2010/02/08 04:33:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,7 +21,6 @@ #include "pgstat.h" #include "storage/bufmgr.h" #include "storage/off.h" -#include "utils/inval.h" #include "utils/rel.h" #include "utils/tqual.h" @@ -46,8 +45,7 @@ typedef struct static int heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, TransactionId OldestXmin, - PruneState *prstate, - bool redirect_move); + PruneState *prstate); static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid); static void heap_prune_record_redirect(PruneState *prstate, OffsetNumber offnum, OffsetNumber rdoffnum); @@ -123,8 +121,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin) */ if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree) { - /* OK to prune (though not to remove redirects) */ - (void) heap_page_prune(relation, buffer, OldestXmin, false, true); + /* OK to prune */ + (void) heap_page_prune(relation, buffer, OldestXmin, true); } /* And release buffer lock */ @@ -141,14 +139,6 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin) * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum). * - * If redirect_move is set, we remove redirecting line pointers by - * updating the root line pointer to point directly to the first non-dead - * tuple in the chain. NOTE: eliminating the redirect changes the first - * tuple's effective CTID, and is therefore unsafe except within VACUUM FULL. - * The only reason we support this capability at all is that by using it, - * VACUUM FULL need not cope with LP_REDIRECT items at all; which seems a - * good thing since VACUUM FULL is overly complicated already. - * * If report_stats is true then we send the number of reclaimed heap-only * tuples to pgstats. (This must be FALSE during vacuum, since vacuum will * send its own new total to pgstats, and we don't want this delta applied @@ -158,7 +148,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin) */ int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, - bool redirect_move, bool report_stats) + bool report_stats) { int ndeleted = 0; Page page = BufferGetPage(buffer); @@ -172,17 +162,10 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, * logic as possible out of the critical section, and also ensures that * WAL replay will work the same as the normal case. * - * First, inform inval.c that upcoming CacheInvalidateHeapTuple calls are - * nontransactional. - */ - if (redirect_move) - BeginNonTransactionalInvalidation(); - - /* - * Initialize the new pd_prune_xid value to zero (indicating no prunable - * tuples). If we find any tuples which may soon become prunable, we will - * save the lowest relevant XID in new_prune_xid. Also initialize the rest - * of our working state. + * First, initialize the new pd_prune_xid value to zero (indicating no + * prunable tuples). If we find any tuples which may soon become + * prunable, we will save the lowest relevant XID in new_prune_xid. + * Also initialize the rest of our working state. */ prstate.new_prune_xid = InvalidTransactionId; prstate.latestRemovedXid = InvalidTransactionId; @@ -209,22 +192,9 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, /* Process this item or chain of items */ ndeleted += heap_prune_chain(relation, buffer, offnum, OldestXmin, - &prstate, - redirect_move); + &prstate); } - /* - * Send invalidation messages for any tuples we are about to move. It is - * safe to do this now, even though we could theoretically still fail - * before making the actual page update, because a useless cache - * invalidation doesn't hurt anything. Also, no one else can reload the - * tuples while we have exclusive buffer lock, so it's not too early to - * send the invals. This avoids sending the invals while inside the - * critical section, which is a good thing for robustness. - */ - if (redirect_move) - EndNonTransactionalInvalidation(); - /* Any error while applying the changes is critical */ START_CRIT_SECTION(); @@ -238,8 +208,7 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, heap_page_prune_execute(buffer, prstate.redirected, prstate.nredirected, prstate.nowdead, prstate.ndead, - prstate.nowunused, prstate.nunused, - redirect_move); + prstate.nowunused, prstate.nunused); /* * Update the page's pd_prune_xid field to either zero, or the lowest @@ -257,7 +226,7 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, MarkBufferDirty(buffer); /* - * Emit a WAL HEAP_CLEAN or HEAP_CLEAN_MOVE record showing what we did + * Emit a WAL HEAP_CLEAN record showing what we did */ if (!relation->rd_istemp) { @@ -267,7 +236,7 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, prstate.redirected, prstate.nredirected, prstate.nowdead, prstate.ndead, prstate.nowunused, prstate.nunused, - prstate.latestRemovedXid, redirect_move); + prstate.latestRemovedXid); PageSetLSN(BufferGetPage(buffer), recptr); PageSetTLI(BufferGetPage(buffer), ThisTimeLineID); @@ -349,16 +318,12 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED * state are added to nowunused[]. * - * If redirect_move is true, we intend to get rid of redirecting line pointers, - * not just make redirection entries. - * * Returns the number of tuples (to be) deleted from the page. */ static int heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, TransactionId OldestXmin, - PruneState *prstate, - bool redirect_move) + PruneState *prstate) { int ndeleted = 0; Page dp = (Page) BufferGetPage(buffer); @@ -366,7 +331,6 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, ItemId rootlp; HeapTupleHeader htup; OffsetNumber latestdead = InvalidOffsetNumber, - redirect_target = InvalidOffsetNumber, maxoff = PageGetMaxOffsetNumber(dp), offnum; OffsetNumber chainitems[MaxHeapTuplesPerPage]; @@ -592,12 +556,7 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, if (i >= nchain) heap_prune_record_dead(prstate, rootoffnum); else - { heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]); - /* If the redirection will be a move, need more processing */ - if (redirect_move) - redirect_target = chainitems[i]; - } } else if (nchain < 2 && ItemIdIsRedirected(rootlp)) { @@ -610,42 +569,6 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, */ heap_prune_record_dead(prstate, rootoffnum); } - else if (redirect_move && ItemIdIsRedirected(rootlp)) - { - /* - * If we desire to eliminate LP_REDIRECT items by moving tuples, make - * a redirection entry for each redirected root item; this will cause - * heap_page_prune_execute to actually do the move. (We get here only - * when there are no DEAD tuples in the chain; otherwise the - * redirection entry was made above.) - */ - heap_prune_record_redirect(prstate, rootoffnum, chainitems[1]); - redirect_target = chainitems[1]; - } - - /* - * If we are going to implement a redirect by moving tuples, we have to - * issue a cache invalidation against the redirection target tuple, - * because its CTID will be effectively changed by the move. Note that - * CacheInvalidateHeapTuple only queues the request, it doesn't send it; - * if we fail before reaching EndNonTransactionalInvalidation, nothing - * happens and no harm is done. - */ - if (OffsetNumberIsValid(redirect_target)) - { - ItemId firstlp = PageGetItemId(dp, redirect_target); - HeapTupleData firsttup; - - Assert(ItemIdIsNormal(firstlp)); - /* Set up firsttup to reference the tuple at its existing CTID */ - firsttup.t_data = (HeapTupleHeader) PageGetItem(dp, firstlp); - firsttup.t_len = ItemIdGetLength(firstlp); - ItemPointerSet(&firsttup.t_self, - BufferGetBlockNumber(buffer), - redirect_target); - firsttup.t_tableOid = RelationGetRelid(relation); - CacheInvalidateHeapTuple(relation, &firsttup); - } return ndeleted; } @@ -715,14 +638,13 @@ void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, - OffsetNumber *nowunused, int nunused, - bool redirect_move) + OffsetNumber *nowunused, int nunused) { Page page = (Page) BufferGetPage(buffer); OffsetNumber *offnum; int i; - /* Update all redirected or moved line pointers */ + /* Update all redirected line pointers */ offnum = redirected; for (i = 0; i < nredirected; i++) { @@ -730,30 +652,7 @@ heap_page_prune_execute(Buffer buffer, OffsetNumber tooff = *offnum++; ItemId fromlp = PageGetItemId(page, fromoff); - if (redirect_move) - { - /* Physically move the "to" item to the "from" slot */ - ItemId tolp = PageGetItemId(page, tooff); - HeapTupleHeader htup; - - *fromlp = *tolp; - ItemIdSetUnused(tolp); - - /* - * Change heap-only status of the tuple because after the line - * pointer manipulation, it's no longer a heap-only tuple, but is - * directly pointed to by index entries. - */ - Assert(ItemIdIsNormal(fromlp)); - htup = (HeapTupleHeader) PageGetItem(page, fromlp); - Assert(HeapTupleHeaderIsHeapOnly(htup)); - HeapTupleHeaderClearHeapOnly(htup); - } - else - { - /* Just insert a REDIRECT link at fromoff */ - ItemIdSetRedirect(fromlp, tooff); - } + ItemIdSetRedirect(fromlp, tooff); } /* Update all now-dead line pointers */ diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README index e53315a83fb..57d6308adac 100644 --- a/src/backend/access/nbtree/README +++ b/src/backend/access/nbtree/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/access/nbtree/README,v 1.21 2009/12/19 01:32:32 sriggs Exp $ +$PostgreSQL: pgsql/src/backend/access/nbtree/README,v 1.22 2010/02/08 04:33:53 tgl Exp $ Btree Indexing ============== @@ -171,9 +171,9 @@ We consider deleting an entire page from the btree only when it's become completely empty of items. (Merging partly-full pages would allow better space reuse, but it seems impractical to move existing data items left or right to make this happen --- a scan moving in the opposite direction -might miss the items if so. We could do it during VACUUM FULL, though.) -Also, we *never* delete the rightmost page on a tree level (this -restriction simplifies the traversal algorithms, as explained below). +might miss the items if so.) Also, we *never* delete the rightmost page +on a tree level (this restriction simplifies the traversal algorithms, as +explained below). To delete an empty page, we acquire write lock on its left sibling (if any), the target page itself, the right sibling (there must be one), and @@ -266,8 +266,7 @@ transactions that were running at the time of deletion are dead; which is overly strong, but is simple to implement within Postgres. When marked dead, a deleted page is labeled with the next-transaction counter value. VACUUM can reclaim the page for re-use when this transaction number is -older than the oldest open transaction. (NOTE: VACUUM FULL can reclaim -such pages immediately.) +older than the oldest open transaction. Reclaiming a page doesn't actually change its state on disk --- we simply record it in the shared-memory free space map, from which it will be diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 71b23644e2b..b0eff770d0b 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.117 2010/02/01 13:40:28 sriggs Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.118 2010/02/08 04:33:53 tgl Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -877,7 +877,7 @@ _bt_parent_deletion_safe(Relation rel, BlockNumber target, BTStack stack) * frequently. */ int -_bt_pagedel(Relation rel, Buffer buf, BTStack stack, bool vacuum_full) +_bt_pagedel(Relation rel, Buffer buf, BTStack stack) { int result; BlockNumber target, @@ -1207,14 +1207,13 @@ _bt_pagedel(Relation rel, Buffer buf, BTStack stack, bool vacuum_full) /* * Mark the page itself deleted. It can be recycled when all current - * transactions are gone; or immediately if we're doing VACUUM FULL. + * transactions are gone. */ page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); opaque->btpo_flags &= ~BTP_HALF_DEAD; opaque->btpo_flags |= BTP_DELETED; - opaque->btpo.xact = - vacuum_full ? FrozenTransactionId : ReadNewTransactionId(); + opaque->btpo.xact = ReadNewTransactionId(); /* And update the metapage, if needed */ if (BufferIsValid(metabuf)) @@ -1350,7 +1349,7 @@ _bt_pagedel(Relation rel, Buffer buf, BTStack stack, bool vacuum_full) { /* recursive call will release pbuf */ _bt_relbuf(rel, rbuf); - result = _bt_pagedel(rel, pbuf, stack->bts_parent, vacuum_full) + 1; + result = _bt_pagedel(rel, pbuf, stack->bts_parent) + 1; _bt_relbuf(rel, buf); } else if (parent_one_child && rightsib_empty) @@ -1358,7 +1357,7 @@ _bt_pagedel(Relation rel, Buffer buf, BTStack stack, bool vacuum_full) _bt_relbuf(rel, pbuf); _bt_relbuf(rel, buf); /* recursive call will release rbuf */ - result = _bt_pagedel(rel, rbuf, stack, vacuum_full) + 1; + result = _bt_pagedel(rel, rbuf, stack) + 1; } else { diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index f37cc8e26bc..b0acaf257f2 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.174 2010/01/02 16:57:35 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.175 2010/02/08 04:33:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -579,12 +579,12 @@ btvacuumcleanup(PG_FUNCTION_ARGS) IndexFreeSpaceMapVacuum(info->index); /* - * During a non-FULL vacuum it's quite possible for us to be fooled by - * concurrent page splits into double-counting some index tuples, so - * disbelieve any total that exceeds the underlying heap's count ... if we - * know that accurately. Otherwise this might just make matters worse. + * It's quite possible for us to be fooled by concurrent page splits into + * double-counting some index tuples, so disbelieve any total that exceeds + * the underlying heap's count ... if we know that accurately. Otherwise + * this might just make matters worse. */ - if (!info->vacuum_full && !info->estimated_count) + if (!info->estimated_count) { if (stats->num_index_tuples > info->num_heap_tuples) stats->num_index_tuples = info->num_heap_tuples; @@ -687,27 +687,6 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, } /* - * During VACUUM FULL, we truncate off any recyclable pages at the end of - * the index. In a normal vacuum it'd be unsafe to do this except by - * acquiring exclusive lock on the index and then rechecking all the - * pages; doesn't seem worth it. - */ - if (info->vacuum_full && vstate.lastUsedPage < num_pages - 1) - { - BlockNumber new_pages = vstate.lastUsedPage + 1; - - /* - * Okay to truncate. - */ - RelationTruncate(rel, new_pages); - - /* update statistics */ - stats->pages_removed += num_pages - new_pages; - vstate.totFreePages -= (num_pages - new_pages); - num_pages = new_pages; - } - - /* * InHotStandby we need to scan right up to the end of the index for * correct locking, so we may need to write a WAL record for the final * block in the index if it was not vacuumed. It's possible that VACUUMing @@ -963,26 +942,12 @@ restart: MemoryContextReset(vstate->pagedelcontext); oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext); - ndel = _bt_pagedel(rel, buf, NULL, info->vacuum_full); + ndel = _bt_pagedel(rel, buf, NULL); /* count only this page, else may double-count parent */ if (ndel) stats->pages_deleted++; - /* - * During VACUUM FULL it's okay to recycle deleted pages immediately, - * since there can be no other transactions scanning the index. Note - * that we will only recycle the current page and not any parent pages - * that _bt_pagedel might have recursed to; this seems reasonable in - * the name of simplicity. (Trying to do otherwise would mean we'd - * have to sort the list of recyclable pages we're building.) - */ - if (ndel && info->vacuum_full) - { - RecordFreeIndexPage(rel, blkno); - vstate->totFreePages++; - } - MemoryContextSwitchTo(oldcontext); /* pagedel released buffer, so we shouldn't */ } diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index f83b3188125..83a7c98c14e 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.59 2010/01/29 17:10:05 sriggs Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.60 2010/02/08 04:33:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1079,8 +1079,8 @@ btree_xlog_cleanup(void) Relation reln; reln = CreateFakeRelcacheEntry(action->node); - if (_bt_pagedel(reln, buf, NULL, true) == 0) - elog(PANIC, "btree_xlog_cleanup: _bt_pagdel failed"); + if (_bt_pagedel(reln, buf, NULL) == 0) + elog(PANIC, "btree_xlog_cleanup: _bt_pagedel failed"); FreeFakeRelcacheEntry(reln); } } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 156ed5c47be..27ce9ac4c30 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.283 2010/02/07 20:48:09 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.284 2010/02/08 04:33:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -881,11 +881,9 @@ AtSubStart_ResourceOwner(void) * * Returns latest XID among xact and its children, or InvalidTransactionId * if the xact has no XID. (We compute that here just because it's easier.) - * - * This is exported only to support an ugly hack in VACUUM FULL. */ -TransactionId -RecordTransactionCommit(bool isVacuumFull) +static TransactionId +RecordTransactionCommit(void) { TransactionId xid = GetTopTransactionIdIfAny(); bool markXidCommitted = TransactionIdIsValid(xid); @@ -950,8 +948,6 @@ RecordTransactionCommit(bool isVacuumFull) xlrec.xinfo = 0; if (RelcacheInitFileInval) xlrec.xinfo |= XACT_COMPLETION_UPDATE_RELCACHE_FILE; - if (isVacuumFull) - xlrec.xinfo |= XACT_COMPLETION_VACUUM_FULL; if (forceSyncCommit) xlrec.xinfo |= XACT_COMPLETION_FORCE_SYNC_COMMIT; @@ -1755,7 +1751,7 @@ CommitTransaction(void) /* * Here is where we really truly commit. */ - latestXid = RecordTransactionCommit(false); + latestXid = RecordTransactionCommit(); TRACE_POSTGRESQL_TRANSACTION_COMMIT(MyProc->lxid); @@ -4374,28 +4370,23 @@ xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid, XLogRecPtr lsn) LWLockRelease(XidGenLock); } - if (!InHotStandby || XactCompletionVacuumFull(xlrec)) + if (!InHotStandby) { /* * Mark the transaction committed in pg_clog. - * - * If InHotStandby and this is the first commit of a VACUUM FULL INPLACE - * we perform only the actual commit to clog. Strangely, there are two - * commits that share the same xid for every VFI, so we need to skip - * some steps for the first commit. It's OK to repeat the clog update - * when we see the second commit on a VFI. */ TransactionIdCommitTree(xid, xlrec->nsubxacts, sub_xids); } else { /* - * If a transaction completion record arrives that has as-yet unobserved - * subtransactions then this will not have been fully handled by the call - * to RecordKnownAssignedTransactionIds() in the main recovery loop in - * xlog.c. So we need to do bookkeeping again to cover that case. This is - * confusing and it is easy to think this call is irrelevant, which has - * happened three times in development already. Leave it in. + * If a transaction completion record arrives that has as-yet + * unobserved subtransactions then this will not have been fully + * handled by the call to RecordKnownAssignedTransactionIds() in the + * main recovery loop in xlog.c. So we need to do bookkeeping again to + * cover that case. This is confusing and it is easy to think this + * call is irrelevant, which has happened three times in development + * already. Leave it in. */ RecordKnownAssignedTransactionIds(max_xid); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f4b03f4c1be..067827abeb6 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.367 2010/02/07 20:48:09 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.368 2010/02/08 04:33:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2107,32 +2107,6 @@ XLogBackgroundFlush(void) } /* - * Flush any previous asynchronously-committed transactions' commit records. - * - * NOTE: it is unwise to assume that this provides any strong guarantees. - * In particular, because of the inexact LSN bookkeeping used by clog.c, - * we cannot assume that hint bits will be settable for these transactions. - */ -void -XLogAsyncCommitFlush(void) -{ - XLogRecPtr WriteRqstPtr; - - /* use volatile pointer to prevent code rearrangement */ - volatile XLogCtlData *xlogctl = XLogCtl; - - /* There's no asynchronously committed transactions during recovery */ - if (RecoveryInProgress()) - return; - - SpinLockAcquire(&xlogctl->info_lck); - WriteRqstPtr = xlogctl->asyncCommitLSN; - SpinLockRelease(&xlogctl->info_lck); - - XLogFlush(WriteRqstPtr); -} - -/* * Test whether XLOG data has been flushed up to (at least) the given position. * * Returns true if a flush is still needed. (It may be that someone else |