diff options
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/access/heap/heapam.c | 11 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtinsert.c | 156 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtpage.c | 47 |
3 files changed, 177 insertions, 37 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 4bb638d6c93..dbcefbf2733 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.85 2000/09/07 09:58:34 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.86 2000/10/04 00:04:41 vadim Exp $ * * * INTERFACE ROUTINES @@ -86,8 +86,8 @@ #include "utils/inval.h" #include "utils/relcache.h" -#ifdef XLOG /* comments are in _heap_update */ -static ItemPointerData _locked_tuple; +#ifdef XLOG /* comments are in heap_update */ +static xl_heaptid _locked_tuple_; #endif @@ -1650,8 +1650,9 @@ l2: * In the event of crash prio logging, TQUAL routines will see * HEAP_XMAX_UNLOGGED flag... */ - _locked_tuple = *otid; - XactPushRollback(_heap_unlock_tuple, (void*) &_locked_tuple); + _locked_tuple_.node = relation->rd_node; + _locked_tuple_.tid = *otid; + XactPushRollback(_heap_unlock_tuple, (void*) &_locked_tuple_); #endif TransactionIdStore(GetCurrentTransactionId(), &(oldtup.t_data->t_xmax)); oldtup.t_data->t_cmax = GetCurrentCommandId(); diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 1dc945d9d96..ddaaa5f304c 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.62 2000/08/25 23:13:33 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.63 2000/10/04 00:04:42 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -33,6 +33,7 @@ typedef struct int best_delta; /* best size delta so far */ } FindSplitData; +void _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf); static TransactionId _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel, Buffer buf, @@ -54,7 +55,6 @@ static void _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright, int leftfree, int rightfree, bool newitemonleft, Size firstrightitemsz); static Buffer _bt_getstackbuf(Relation rel, BTStack stack); -static void _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf); static void _bt_pgaddtup(Relation rel, Page page, Size itemsize, BTItem btitem, OffsetNumber itup_off, const char *where); @@ -514,6 +514,29 @@ _bt_insertonpg(Relation rel, } else { +#ifdef XLOG + /* XLOG stuff */ + { + char xlbuf[sizeof(xl_btree_insert) + 2 * sizeof(CommandId)]; + xl_btree_insert *xlrec = xlbuf; + int hsize = SizeOfBtreeInsert; + + xlrec->target.node = rel->rd_node; + ItemPointerSet(&(xlrec->target.tid), BufferGetBlockNumber(buf), newitemoff); + if (P_ISLEAF(lpageop)) + { + CommandId cid = GetCurrentCommandId(); + memcpy(xlbuf + SizeOfBtreeInsert, &(char*)cid, sizeof(CommandId)); + hsize += sizeof(CommandId); + } + + XLogRecPtr recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_INSERT, + xlbuf, hsize, (char*) btitem, itemsz); + + PageSetLSN(page, recptr); + PageSetSUI(page, ThisStartUpID); + } +#endif _bt_pgaddtup(rel, page, itemsz, btitem, newitemoff, "page"); itup_off = newitemoff; itup_blkno = BufferGetBlockNumber(buf); @@ -578,8 +601,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, ropaque = (BTPageOpaque) PageGetSpecialPointer(rightpage); /* if we're splitting this page, it won't be the root when we're done */ - oopaque->btpo_flags &= ~BTP_ROOT; - lopaque->btpo_flags = ropaque->btpo_flags = oopaque->btpo_flags; + lopaque->btpo_flags = oopaque->btpo_flags; + lopaque->btpo_flags &= ~BTP_ROOT; + ropaque->btpo_flags = lopaque->btpo_flags; lopaque->btpo_prev = oopaque->btpo_prev; lopaque->btpo_next = BufferGetBlockNumber(rbuf); ropaque->btpo_prev = BufferGetBlockNumber(buf); @@ -608,7 +632,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, item = (BTItem) PageGetItem(origpage, itemid); if (PageAddItem(rightpage, (Item) item, itemsz, rightoff, LP_USED) == InvalidOffsetNumber) - elog(FATAL, "btree: failed to add hikey to the right sibling"); + elog(STOP, "btree: failed to add hikey to the right sibling"); rightoff = OffsetNumberNext(rightoff); } @@ -633,7 +657,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, } if (PageAddItem(leftpage, (Item) item, itemsz, leftoff, LP_USED) == InvalidOffsetNumber) - elog(FATAL, "btree: failed to add hikey to the left sibling"); + elog(STOP, "btree: failed to add hikey to the left sibling"); leftoff = OffsetNumberNext(leftoff); /* @@ -705,6 +729,75 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, } /* + * We have to grab the right sibling (if any) and fix the prev + * pointer there. We are guaranteed that this is deadlock-free + * since no other writer will be holding a lock on that page + * and trying to move left, and all readers release locks on a page + * before trying to fetch its neighbors. + */ + + if (!P_RIGHTMOST(ropaque)) + { + sbuf = _bt_getbuf(rel, ropaque->btpo_next, BT_WRITE); + spage = BufferGetPage(sbuf); + } + +#ifdef XLOG + /* + * Right sibling is locked, new siblings are prepared, but original + * page is not updated yet. Log changes before continuing. + * + * NO ELOG(ERROR) till right sibling is updated. + * + */ + { + char xlbuf[sizeof(xl_btree_split) + + 2 * sizeof(CommandId) + BLCKSZ]; + xl_btree_split *xlrec = xlbuf; + int hsize = SizeOfBtreeSplit; + int flag = (newitemonleft) ? + XLOG_BTREE_SPLEFT : XLOG_BTREE_SPLIT; + + xlrec->target.node = rel->rd_node; + ItemPointerSet(&(xlrec->target.tid), itup_blkno, itup_off); + if (P_ISLEAF(lopaque)) + { + CommandId cid = GetCurrentCommandId(); + memcpy(xlbuf + hsize, &(char*)cid, sizeof(CommandId)); + hsize += sizeof(CommandId); + } + if (newitemonleft) + { + memcpy(xlbuf + hsize, (char*) newitem, newitemsz); + hsize += newitemsz; + xlrec->otherblk = BufferGetBlockNumber(rbuf); + } + else + xlrec->otherblk = BufferGetBlockNumber(buf); + + xlrec->rightblk = ropaque->btpo_next; + + /* + * Dirrect access to page is not good but faster - we should + * implement some new func in page API. + */ + XLogRecPtr recptr = XLogInsert(RM_BTREE_ID, flag, xlbuf, + hsize, (char*)rightpage + (PageHeader) rightpage)->pd_upper, + ((PageHeader) rightpage)->pd_special - ((PageHeader) rightpage)->upper); + + PageSetLSN(leftpage, recptr); + PageSetSUI(leftpage, ThisStartUpID); + PageSetLSN(rightpage, recptr); + PageSetSUI(rightpage, ThisStartUpID); + if (!P_RIGHTMOST(ropaque)) + { + PageSetLSN(spage, recptr); + PageSetSUI(spage, ThisStartUpID); + } + } +#endif + + /* * By here, the original data page has been split into two new halves, * and these are correct. The algorithm requires that the left page * never move during a split, so we copy the new left page back on top @@ -716,18 +809,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, PageRestoreTempPage(leftpage, origpage); - /* - * Finally, we need to grab the right sibling (if any) and fix the - * prev pointer there. We are guaranteed that this is deadlock-free - * since no other writer will be holding a lock on that page - * and trying to move left, and all readers release locks on a page - * before trying to fetch its neighbors. - */ - if (!P_RIGHTMOST(ropaque)) { - sbuf = _bt_getbuf(rel, ropaque->btpo_next, BT_WRITE); - spage = BufferGetPage(sbuf); sopaque = (BTPageOpaque) PageGetSpecialPointer(spage); sopaque->btpo_prev = BufferGetBlockNumber(rbuf); @@ -1002,7 +1085,7 @@ _bt_getstackbuf(Relation rel, BTStack stack) * two new children. The new root page is neither pinned nor locked, and * we have also written out lbuf and rbuf and dropped their pins/locks. */ -static void +void _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) { Buffer rootbuf; @@ -1011,7 +1094,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) rootpage; BlockNumber lbkno, rbkno; - BlockNumber rootbknum; + BlockNumber rootblknum; BTPageOpaque rootopaque; ItemId itemid; BTItem item; @@ -1021,12 +1104,16 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) /* get a new root page */ rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); rootpage = BufferGetPage(rootbuf); - rootbknum = BufferGetBlockNumber(rootbuf); + rootblknum = BufferGetBlockNumber(rootbuf); + + + /* NO ELOG(ERROR) from here till newroot op is logged */ /* set btree special data */ rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage); rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE; rootopaque->btpo_flags |= BTP_ROOT; + rootopaque->btpo_parent = BTREE_METAPAGE; lbkno = BufferGetBlockNumber(lbuf); rbkno = BufferGetBlockNumber(rbuf); @@ -1040,7 +1127,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) */ ((BTPageOpaque) PageGetSpecialPointer(lpage))->btpo_parent = ((BTPageOpaque) PageGetSpecialPointer(rpage))->btpo_parent = - rootbknum; + rootblknum; /* * Create downlink item for left page (old root). Since this will be @@ -1058,7 +1145,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) * the two items will go into positions P_HIKEY and P_FIRSTKEY. */ if (PageAddItem(rootpage, (Item) new_item, itemsz, P_HIKEY, LP_USED) == InvalidOffsetNumber) - elog(FATAL, "btree: failed to add leftkey to new root page"); + elog(STOP, "btree: failed to add leftkey to new root page"); pfree(new_item); /* @@ -1075,14 +1162,35 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) * insert the right page pointer into the new root page. */ if (PageAddItem(rootpage, (Item) new_item, itemsz, P_FIRSTKEY, LP_USED) == InvalidOffsetNumber) - elog(FATAL, "btree: failed to add rightkey to new root page"); + elog(STOP, "btree: failed to add rightkey to new root page"); pfree(new_item); +#ifdef XLOG + /* XLOG stuff */ + { + xl_btree_newroot xlrec; + xlrec.node = rel->rd_node; + xlrec.rootblk = rootblknum; + + /* + * Dirrect access to page is not good but faster - we should + * implement some new func in page API. + */ + XLogRecPtr recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, + &xlrec, SizeOfBtreeNewroot, + (char*)rootpage + (PageHeader) rootpage)->pd_upper, + ((PageHeader) rootpage)->pd_special - ((PageHeader) rootpage)->upper); + + PageSetLSN(rootpage, recptr); + PageSetSUI(rootpage, ThisStartUpID); + } +#endif + /* write and let go of the new root buffer */ _bt_wrtbuf(rel, rootbuf); /* update metadata page with new root block number */ - _bt_metaproot(rel, rootbknum, 0); + _bt_metaproot(rel, rootblknum, 0); /* update and release new sibling, and finally the old root */ _bt_wrtbuf(rel, rbuf); @@ -1125,7 +1233,7 @@ _bt_pgaddtup(Relation rel, if (PageAddItem(page, (Item) btitem, itemsize, itup_off, LP_USED) == InvalidOffsetNumber) - elog(FATAL, "btree: failed to add item to the %s for %s", + elog(STOP, "btree: failed to add item to the %s for %s", where, RelationGetRelationName(rel)); } diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 40604dbc258..2da74219010 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.37 2000/07/21 06:42:32 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.38 2000/10/04 00:04:42 vadim Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -128,7 +128,7 @@ _bt_getroot(Relation rel, int access) Page metapg; BTPageOpaque metaopaque; Buffer rootbuf; - Page rootpg; + Page rootpage; BTPageOpaque rootopaque; BlockNumber rootblkno; BTMetaPageData *metad; @@ -177,14 +177,31 @@ _bt_getroot(Relation rel, int access) */ rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); rootblkno = BufferGetBlockNumber(rootbuf); - rootpg = BufferGetPage(rootbuf); + rootpage = BufferGetPage(rootbuf); + + /* NO ELOG(ERROR) till meta is updated */ + + _bt_pageinit(rootpage, BufferGetPageSize(rootbuf)); + rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage); + rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT); + +#ifdef XLOG + /* XLOG stuff */ + { + xl_btree_insert xlrec; + xlrec.node = rel->rd_node; + + XLogRecPtr recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, + &xlrec, SizeOfBtreeNewroot, NULL, 0); + + PageSetLSN(rootpage, recptr); + PageSetSUI(rootpage, ThisStartUpID); + } +#endif metad->btm_root = rootblkno; metad->btm_level = 1; - _bt_pageinit(rootpg, BufferGetPageSize(rootbuf)); - rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg); - rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT); _bt_wrtnorelbuf(rel, rootbuf); /* swap write lock for read lock */ @@ -218,8 +235,8 @@ _bt_getroot(Relation rel, int access) * at the metadata page and got the root buffer, then we got the wrong * buffer. Release it and try again. */ - rootpg = BufferGetPage(rootbuf); - rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg); + rootpage = BufferGetPage(rootbuf); + rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage); if (! P_ISROOT(rootopaque)) { @@ -396,6 +413,20 @@ _bt_pagedel(Relation rel, ItemPointer tid) buf = _bt_getbuf(rel, blkno, BT_WRITE); page = BufferGetPage(buf); +#ifdef XLOG + /* XLOG stuff */ + { + xl_btree_delete xlrec; + xlrec.target.node = rel->rd_node; + xlrec.target.tid = *tid; + XLogRecPtr recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, + (char*) xlrec, SizeOfBtreeDelete, NULL, 0); + + PageSetLSN(page, recptr); + PageSetSUI(page, ThisStartUpID); + } +#endif + PageIndexTupleDelete(page, offno); /* write the buffer and release the lock */ |