src/backend/access/heap/hio.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177

/*-------------------------------------------------------------------------
 *
 * hio.c--
 *	  POSTGRES heap access method input/output code.
 *
 * Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  $Id: hio.c,v 1.13 1998/01/07 21:01:23 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */

#include <postgres.h>

#include <storage/bufpage.h>
#include <access/heapam.h>
#include <storage/bufmgr.h>
#include <utils/memutils.h>

/*
 * amputunique	- place tuple at tid
 *	 Currently on errors, calls elog.  Perhaps should return -1?
 *	 Possible errors include the addition of a tuple to the page
 *	 between the time the linep is chosen and the page is L_UP'd.
 *
 *	 This should be coordinated with the B-tree code.
 *	 Probably needs to have an amdelunique to allow for
 *	 internal index records to be deleted and reordered as needed.
 *	 For the heap AM, this should never be needed.
 */
void
RelationPutHeapTuple(Relation relation,
					 BlockNumber blockIndex,
					 HeapTuple tuple)
{
	Buffer		buffer;
	Page		pageHeader;
	BlockNumber numberOfBlocks;
	OffsetNumber offnum;
	unsigned int len;
	ItemId		itemId;
	Item		item;

	/* ----------------
	 *	increment access statistics
	 * ----------------
	 */
	IncrHeapAccessStat(local_RelationPutHeapTuple);
	IncrHeapAccessStat(global_RelationPutHeapTuple);

	Assert(RelationIsValid(relation));
	Assert(HeapTupleIsValid(tuple));

	numberOfBlocks = RelationGetNumberOfBlocks(relation);
	Assert(blockIndex < numberOfBlocks);

	buffer = ReadBuffer(relation, blockIndex);
#ifndef NO_BUFFERISVALID
	if (!BufferIsValid(buffer))
	{
		elog(ERROR, "RelationPutHeapTuple: no buffer for %ld in %s",
			 blockIndex, &relation->rd_rel->relname);
	}
#endif

	pageHeader = (Page) BufferGetPage(buffer);
	len = (unsigned) DOUBLEALIGN(tuple->t_len); /* be conservative */
	Assert((int) len <= PageGetFreeSpace(pageHeader));

	offnum = PageAddItem((Page) pageHeader, (Item) tuple,
						 tuple->t_len, InvalidOffsetNumber, LP_USED);

	itemId = PageGetItemId((Page) pageHeader, offnum);
	item = PageGetItem((Page) pageHeader, itemId);

	ItemPointerSet(&((HeapTuple) item)->t_ctid, blockIndex, offnum);

	WriteBuffer(buffer);
	/* return an accurate tuple */
	ItemPointerSet(&tuple->t_ctid, blockIndex, offnum);
}

/*
 * This routine is another in the series of attempts to reduce the number
 * of I/O's and system calls executed in the various benchmarks.  In
 * particular, this routine is used to append data to the end of a relation
 * file without excessive lseeks.  This code should do no more than 2 semops
 * in the ideal case.
 *
 * Eventually, we should cache the number of blocks in a relation somewhere.
 * Until that time, this code will have to do an lseek to determine the number
 * of blocks in a relation.
 *
 * This code should ideally do at most 4 semops, 1 lseek, and possibly 1 write
 * to do an append; it's possible to eliminate 2 of the semops if we do direct
 * buffer stuff (!); the lseek and the write can go if we get
 * RelationGetNumberOfBlocks to be useful.
 *
 * NOTE: This code presumes that we have a write lock on the relation.
 *
 * Also note that this routine probably shouldn't have to exist, and does
 * screw up the call graph rather badly, but we are wasting so much time and
 * system resources being massively general that we are losing badly in our
 * performance benchmarks.
 */
void
RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
{
	Buffer		buffer;
	Page		pageHeader;
	BlockNumber lastblock;
	OffsetNumber offnum;
	unsigned int len;
	ItemId		itemId;
	Item		item;

	Assert(RelationIsValid(relation));
	Assert(HeapTupleIsValid(tuple));

	/*
	 * XXX This does an lseek - VERY expensive - but at the moment it is
	 * the only way to accurately determine how many blocks are in a
	 * relation.  A good optimization would be to get this to actually
	 * work properly.
	 */

	lastblock = RelationGetNumberOfBlocks(relation);

	if (lastblock == 0)
	{
		buffer = ReadBuffer(relation, lastblock);
		pageHeader = (Page) BufferGetPage(buffer);
		if (PageIsNew((PageHeader) pageHeader))
		{
			buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW);
			pageHeader = (Page) BufferGetPage(buffer);
			PageInit(pageHeader, BufferGetPageSize(buffer), 0);
		}
	}
	else
		buffer = ReadBuffer(relation, lastblock - 1);

	pageHeader = (Page) BufferGetPage(buffer);
	len = (unsigned) DOUBLEALIGN(tuple->t_len); /* be conservative */

	/*
	 * Note that this is true if the above returned a bogus page, which it
	 * will do for a completely empty relation.
	 */

	if (len > PageGetFreeSpace(pageHeader))
	{
		buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW);
		pageHeader = (Page) BufferGetPage(buffer);
		PageInit(pageHeader, BufferGetPageSize(buffer), 0);

		if (len > PageGetFreeSpace(pageHeader))
			elog(ERROR, "Tuple is too big: size %d", len);
	}

	offnum = PageAddItem((Page) pageHeader, (Item) tuple,
						 tuple->t_len, InvalidOffsetNumber, LP_USED);

	itemId = PageGetItemId((Page) pageHeader, offnum);
	item = PageGetItem((Page) pageHeader, itemId);

	lastblock = BufferGetBlockNumber(buffer);

	ItemPointerSet(&((HeapTuple) item)->t_ctid, lastblock, offnum);

	/* return an accurate tuple */
	ItemPointerSet(&tuple->t_ctid, lastblock, offnum);

	WriteBuffer(buffer);
}