1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
|
/*-------------------------------------------------------------------------
*
* nbtree.h
* header file for postgres btree access method implementation.
*
*
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: nbtree.h,v 1.38 2000/06/15 03:32:31 momjian Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef NBTREE_H
#define NBTREE_H
#include "access/itup.h"
#include "access/relscan.h"
#include "access/sdir.h"
/*
* BTPageOpaqueData -- At the end of every page, we store a pointer
* to both siblings in the tree. See Lehman and Yao's paper for more
* info. In addition, we need to know what sort of page this is
* (leaf or internal), and whether the page is available for reuse.
*
* Lehman and Yao's algorithm requires a ``high key'' on every page.
* The high key on a page is guaranteed to be greater than or equal
* to any key that appears on this page. Our insertion algorithm
* guarantees that we can use the initial least key on our right
* sibling as the high key. We allocate space for the line pointer
* to the high key in the opaque data at the end of the page.
*
* Rightmost pages in the tree have no high key.
*/
typedef struct BTPageOpaqueData
{
BlockNumber btpo_prev;
BlockNumber btpo_next;
BlockNumber btpo_parent;
uint16 btpo_flags;
#define BTP_LEAF (1 << 0)
#define BTP_ROOT (1 << 1)
#define BTP_FREE (1 << 2)
#define BTP_META (1 << 3)
#define BTP_CHAIN (1 << 4)
} BTPageOpaqueData;
typedef BTPageOpaqueData *BTPageOpaque;
/*
* ScanOpaqueData is used to remember which buffers we're currently
* examining in the scan. We keep these buffers locked and pinned
* and recorded in the opaque entry of the scan in order to avoid
* doing a ReadBuffer() for every tuple in the index. This avoids
* semop() calls, which are expensive.
*
* And it's used to remember actual scankey info (we need in it
* if some scankeys evaled at runtime).
*
* curHeapIptr & mrkHeapIptr are heap iptr-s from current/marked
* index tuples: we don't adjust scans on insertions (and, if LLL
* is ON, don't hold locks on index pages between passes) - we
* use these pointers to restore index scan positions...
* - vadim 07/29/98
*/
typedef struct BTScanOpaqueData
{
Buffer btso_curbuf;
Buffer btso_mrkbuf;
ItemPointerData curHeapIptr;
ItemPointerData mrkHeapIptr;
uint16 qual_ok; /* 0 for quals like key == 1 && key > 2 */
uint16 numberOfKeys; /* number of keys */
uint16 numberOfFirstKeys; /* number of keys for 1st
* attribute */
ScanKey keyData; /* key descriptor */
} BTScanOpaqueData;
typedef BTScanOpaqueData *BTScanOpaque;
/*
* BTItems are what we store in the btree. Each item has an index
* tuple, including key and pointer values. In addition, we must
* guarantee that all tuples in the index are unique, in order to
* satisfy some assumptions in Lehman and Yao. The way that we do
* this is by generating a new OID for every insertion that we do in
* the tree. This adds eight bytes to the size of btree index
* tuples. Note that we do not use the OID as part of a composite
* key; the OID only serves as a unique identifier for a given index
* tuple (logical position within a page).
*
* New comments:
* actually, we must guarantee that all tuples in A LEVEL
* are unique, not in ALL INDEX. So, we can use bti_itup->t_tid
* as unique identifier for a given index tuple (logical position
* within a level). - vadim 04/09/97
*/
typedef struct BTItemData
{
IndexTupleData bti_itup;
} BTItemData;
typedef BTItemData *BTItem;
#define BTItemSame(i1, i2) ( i1->bti_itup.t_tid.ip_blkid.bi_hi == \
i2->bti_itup.t_tid.ip_blkid.bi_hi && \
i1->bti_itup.t_tid.ip_blkid.bi_lo == \
i2->bti_itup.t_tid.ip_blkid.bi_lo && \
i1->bti_itup.t_tid.ip_posid == \
i2->bti_itup.t_tid.ip_posid )
/*
* BTStackData -- As we descend a tree, we push the (key, pointer)
* pairs from internal nodes onto a private stack. If we split a
* leaf, we use this stack to walk back up the tree and insert data
* into parent nodes (and possibly to split them, too). Lehman and
* Yao's update algorithm guarantees that under no circumstances can
* our private stack give us an irredeemably bad picture up the tree.
* Again, see the paper for details.
*/
typedef struct BTStackData
{
BlockNumber bts_blkno;
OffsetNumber bts_offset;
BTItem bts_btitem;
struct BTStackData *bts_parent;
} BTStackData;
typedef BTStackData *BTStack;
typedef struct BTPageState
{
Buffer btps_buf;
Page btps_page;
BTItem btps_lastbti;
OffsetNumber btps_lastoff;
OffsetNumber btps_firstoff;
int btps_level;
bool btps_doupper;
struct BTPageState *btps_next;
} BTPageState;
/*
* We need to be able to tell the difference between read and write
* requests for pages, in order to do locking correctly.
*/
#define BT_READ BUFFER_LOCK_SHARE
#define BT_WRITE BUFFER_LOCK_EXCLUSIVE
/*
* Similarly, the difference between insertion and non-insertion binary
* searches on a given page makes a difference when we're descending the
* tree.
*/
#define BT_INSERTION 0
#define BT_DESCENT 1
/*
* In general, the btree code tries to localize its knowledge about
* page layout to a couple of routines. However, we need a special
* value to indicate "no page number" in those places where we expect
* page numbers.
*/
#define P_NONE 0
#define P_LEFTMOST(opaque) ((opaque)->btpo_prev == P_NONE)
#define P_RIGHTMOST(opaque) ((opaque)->btpo_next == P_NONE)
#define P_HIKEY ((OffsetNumber) 1)
#define P_FIRSTKEY ((OffsetNumber) 2)
/*
* Strategy numbers -- ordering of these is <, <=, =, >=, >
*/
#define BTLessStrategyNumber 1
#define BTLessEqualStrategyNumber 2
#define BTEqualStrategyNumber 3
#define BTGreaterEqualStrategyNumber 4
#define BTGreaterStrategyNumber 5
#define BTMaxStrategyNumber 5
/*
* When a new operator class is declared, we require that the user
* supply us with an amproc procedure for determining whether, for
* two keys a and b, a < b, a = b, or a > b. This routine must
* return < 0, 0, > 0, respectively, in these three cases. Since we
* only have one such proc in amproc, it's number 1.
*/
#define BTORDER_PROC 1
/*
* prototypes for functions in nbtinsert.c
*/
extern InsertIndexResult _bt_doinsert(Relation rel, BTItem btitem,
bool index_is_unique, Relation heapRel);
extern bool _bt_itemcmp(Relation rel, Size keysz, ScanKey scankey,
BTItem item1, BTItem item2, StrategyNumber strat);
/*
* prototypes for functions in nbtpage.c
*/
extern void _bt_metapinit(Relation rel);
extern Buffer _bt_getroot(Relation rel, int access);
extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
extern void _bt_relbuf(Relation rel, Buffer buf, int access);
extern void _bt_wrtbuf(Relation rel, Buffer buf);
extern void _bt_wrtnorelbuf(Relation rel, Buffer buf);
extern void _bt_pageinit(Page page, Size size);
extern void _bt_metaproot(Relation rel, BlockNumber rootbknum, int level);
extern Buffer _bt_getstackbuf(Relation rel, BTStack stack, int access);
extern void _bt_pagedel(Relation rel, ItemPointer tid);
/*
* prototypes for functions in nbtree.c
*/
extern bool BuildingBtree; /* in nbtree.c */
extern Datum btbuild(PG_FUNCTION_ARGS);
extern Datum btinsert(PG_FUNCTION_ARGS);
extern Datum btgettuple(PG_FUNCTION_ARGS);
extern Datum btbeginscan(PG_FUNCTION_ARGS);
extern Datum btrescan(PG_FUNCTION_ARGS);
extern void btmovescan(IndexScanDesc scan, Datum v);
extern Datum btendscan(PG_FUNCTION_ARGS);
extern Datum btmarkpos(PG_FUNCTION_ARGS);
extern Datum btrestrpos(PG_FUNCTION_ARGS);
extern Datum btdelete(PG_FUNCTION_ARGS);
/*
* prototypes for functions in nbtscan.c
*/
extern void _bt_regscan(IndexScanDesc scan);
extern void _bt_dropscan(IndexScanDesc scan);
extern void _bt_adjscans(Relation rel, ItemPointer tid);
extern void AtEOXact_nbtree(void);
/*
* prototypes for functions in nbtsearch.c
*/
extern BTStack _bt_search(Relation rel, int keysz, ScanKey scankey,
Buffer *bufP);
extern Buffer _bt_moveright(Relation rel, Buffer buf, int keysz,
ScanKey scankey, int access);
extern bool _bt_skeycmp(Relation rel, Size keysz, ScanKey scankey,
Page page, ItemId itemid, StrategyNumber strat);
extern OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz,
ScanKey scankey, int srchtype);
extern RetrieveIndexResult _bt_next(IndexScanDesc scan, ScanDirection dir);
extern RetrieveIndexResult _bt_first(IndexScanDesc scan, ScanDirection dir);
extern bool _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir);
/*
* prototypes for functions in nbtstrat.c
*/
extern StrategyNumber _bt_getstrat(Relation rel, AttrNumber attno,
RegProcedure proc);
/*
* prototypes for functions in nbtutils.c
*/
extern ScanKey _bt_mkscankey(Relation rel, IndexTuple itup);
extern ScanKey _bt_mkscankey_nodata(Relation rel);
extern void _bt_freeskey(ScanKey skey);
extern void _bt_freestack(BTStack stack);
extern void _bt_orderkeys(Relation relation, BTScanOpaque so);
extern bool _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size *keysok);
extern BTItem _bt_formitem(IndexTuple itup);
/*
* prototypes for functions in nbtsort.c
*/
typedef struct BTSpool BTSpool; /* opaque type known only within nbtsort.c */
extern BTSpool *_bt_spoolinit(Relation index, bool isunique);
extern void _bt_spooldestroy(BTSpool *btspool);
extern void _bt_spool(BTItem btitem, BTSpool *btspool);
extern void _bt_leafbuild(BTSpool *btspool);
#endif /* NBTREE_H */
|