diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2007-04-06 04:21:44 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2007-04-06 04:21:44 +0000 |
commit | 3e23b68dac006e8deb0afa327e855258df8de064 (patch) | |
tree | f5a555955dd954265dea1107e08dadd917714551 /src/backend/access | |
parent | d44163953c2ce74d6db9d9807e030a0a3b725da5 (diff) | |
download | postgresql-3e23b68dac006e8deb0afa327e855258df8de064.tar.gz postgresql-3e23b68dac006e8deb0afa327e855258df8de064.zip |
Support varlena fields with single-byte headers and unaligned storage.
This commit breaks any code that assumes that the mere act of forming a tuple
(without writing it to disk) does not "toast" any fields. While all available
regression tests pass, I'm not totally sure that we've fixed every nook and
cranny, especially in contrib.
Greg Stark with some help from Tom Lane
Diffstat (limited to 'src/backend/access')
-rw-r--r-- | src/backend/access/common/heaptuple.c | 447 | ||||
-rw-r--r-- | src/backend/access/common/indextuple.c | 142 | ||||
-rw-r--r-- | src/backend/access/heap/tuptoaster.c | 496 |
3 files changed, 752 insertions, 333 deletions
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index 0c83262c3b8..f1a80d6feee 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -11,12 +11,53 @@ * we can get rid of it entirely. * * + * Some notes about varlenas and this code: + * + * Before Postgres 8.3 varlenas always had a 4-byte length header, and + * therefore always needed 4-byte alignment (at least). This wasted space + * for short varlenas, for example CHAR(1) took 5 bytes and could need up to + * 3 additional padding bytes for alignment. + * + * Now, a short varlena (up to 126 data bytes) is reduced to a 1-byte header + * and we don't align it. To hide this from datatype-specific functions that + * don't want to deal with it, such a datum is considered "toasted" and will + * be expanded back to the normal 4-byte-header format by pg_detoast_datum. + * (In performance-critical code paths we can use pg_detoast_datum_packed + * and the appropriate access macros to avoid that overhead.) Note that this + * conversion is performed directly in heap_form_tuple (or heap_formtuple), + * without explicitly invoking the toaster. + * + * This change will break any code that assumes it needn't detoast values + * that have been put into a tuple but never sent to disk. Hopefully there + * are few such places. + * + * Varlenas still have alignment 'i' (or 'd') in pg_type/pg_attribute, since + * that's the normal requirement for the untoasted format. But we ignore that + * for the 1-byte-header format. This means that the actual start position + * of a varlena datum may vary depending on which format it has. To determine + * what is stored, we have to require that alignment padding bytes be zero. + * (Postgres actually has always zeroed them, but now it's required!) Since + * the first byte of a 1-byte-header varlena can never be zero, we can examine + * the first byte after the previous datum to tell if it's a pad byte or the + * start of a 1-byte-header varlena. + * + * Note that while formerly we could rely on the first varlena column of a + * system catalog to be at the offset suggested by the C struct for the + * catalog, this is now risky: it's only safe if the preceding field is + * word-aligned, so that there will never be any padding. + * + * We don't pack varlenas whose attstorage is 'p', since the data type + * isn't expecting to have to detoast values. This is used in particular + * by oidvector and int2vector, which are used in the system catalogs + * and we'd like to still refer to them via C struct offsets. + * + * * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/common/heaptuple.c,v 1.116 2007/02/27 23:48:06 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/common/heaptuple.c,v 1.117 2007/04/06 04:21:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,11 +69,20 @@ #include "executor/tuptable.h" +/* Does att's datatype allow packing into the 1-byte-header varlena format? */ +#define ATT_IS_PACKABLE(att) \ + ((att)->attlen == -1 && (att)->attstorage != 'p') +/* Use this if it's already known varlena */ +#define VARLENA_ATT_IS_PACKABLE(att) \ + ((att)->attstorage != 'p') + + /* ---------------------------------------------------------------- * misc support routines * ---------------------------------------------------------------- */ + /* * heap_compute_data_size * Determine size of the data area of a tuple to be constructed @@ -49,11 +99,29 @@ heap_compute_data_size(TupleDesc tupleDesc, for (i = 0; i < numberOfAttributes; i++) { + Datum val; + if (isnull[i]) continue; - data_length = att_align(data_length, att[i]->attalign); - data_length = att_addlength(data_length, att[i]->attlen, values[i]); + val = values[i]; + + if (ATT_IS_PACKABLE(att[i]) && + VARATT_CAN_MAKE_SHORT(DatumGetPointer(val))) + { + /* + * we're anticipating converting to a short varlena header, + * so adjust length and don't count any alignment + */ + data_length += VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(val)); + } + else + { + data_length = att_align_datum(data_length, att[i]->attalign, + att[i]->attlen, val); + data_length = att_addlength_datum(data_length, att[i]->attlen, + val); + } } return data_length; @@ -79,11 +147,29 @@ ComputeDataSize(TupleDesc tupleDesc, for (i = 0; i < numberOfAttributes; i++) { + Datum val; + if (nulls[i] != ' ') continue; - data_length = att_align(data_length, att[i]->attalign); - data_length = att_addlength(data_length, att[i]->attlen, values[i]); + val = values[i]; + + if (ATT_IS_PACKABLE(att[i]) && + VARATT_CAN_MAKE_SHORT(DatumGetPointer(val))) + { + /* + * we're anticipating converting to a short varlena header, + * so adjust length and don't count any alignment + */ + data_length += VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(val)); + } + else + { + data_length = att_align_datum(data_length, att[i]->attalign, + att[i]->attlen, val); + data_length = att_addlength_datum(data_length, att[i]->attlen, + val); + } } return data_length; @@ -95,17 +181,23 @@ ComputeDataSize(TupleDesc tupleDesc, * * We also fill the null bitmap (if any) and set the infomask bits * that reflect the tuple's data contents. + * + * NOTE: it is now REQUIRED that the caller have pre-zeroed the data area. */ void heap_fill_tuple(TupleDesc tupleDesc, Datum *values, bool *isnull, - char *data, uint16 *infomask, bits8 *bit) + char *data, Size data_size, + uint16 *infomask, bits8 *bit) { bits8 *bitP; int bitmask; int i; int numberOfAttributes = tupleDesc->natts; Form_pg_attribute *att = tupleDesc->attrs; +#ifdef USE_ASSERT_CHECKING + char *start = data; +#endif if (bit != NULL) { @@ -119,7 +211,7 @@ heap_fill_tuple(TupleDesc tupleDesc, bitmask = 0; } - *infomask &= ~(HEAP_HASNULL | HEAP_HASVARWIDTH | HEAP_HASEXTENDED); + *infomask &= ~(HEAP_HASNULL | HEAP_HASVARWIDTH | HEAP_HASEXTERNAL); for (i = 0; i < numberOfAttributes; i++) { @@ -145,36 +237,66 @@ heap_fill_tuple(TupleDesc tupleDesc, *bitP |= bitmask; } - /* XXX we are aligning the pointer itself, not the offset */ - data = (char *) att_align((long) data, att[i]->attalign); + /* + * XXX we use the att_align macros on the pointer value itself, + * not on an offset. This is a bit of a hack. + */ if (att[i]->attbyval) { /* pass-by-value */ + data = (char *) att_align_nominal((long) data, att[i]->attalign); store_att_byval(data, values[i], att[i]->attlen); data_length = att[i]->attlen; } else if (att[i]->attlen == -1) { /* varlena */ + Pointer val = DatumGetPointer(values[i]); + *infomask |= HEAP_HASVARWIDTH; - if (VARATT_IS_EXTERNAL(values[i])) + if (VARATT_IS_EXTERNAL(val)) + { *infomask |= HEAP_HASEXTERNAL; - if (VARATT_IS_COMPRESSED(values[i])) - *infomask |= HEAP_HASCOMPRESSED; - data_length = VARSIZE(DatumGetPointer(values[i])); - memcpy(data, DatumGetPointer(values[i]), data_length); + /* no alignment, since it's short by definition */ + data_length = VARSIZE_EXTERNAL(val); + memcpy(data, val, data_length); + } + else if (VARATT_IS_SHORT(val)) + { + /* no alignment for short varlenas */ + data_length = VARSIZE_SHORT(val); + memcpy(data, val, data_length); + } + else if (VARLENA_ATT_IS_PACKABLE(att[i]) && + VARATT_CAN_MAKE_SHORT(val)) + { + /* convert to short varlena -- no alignment */ + data_length = VARATT_CONVERTED_SHORT_SIZE(val); + SET_VARSIZE_SHORT(data, data_length); + memcpy(data + 1, VARDATA(val), data_length - 1); + } + else + { + /* full 4-byte header varlena */ + data = (char *) att_align_nominal((long) data, + att[i]->attalign); + data_length = VARSIZE(val); + memcpy(data, val, data_length); + } } else if (att[i]->attlen == -2) { - /* cstring */ + /* cstring ... never needs alignment */ *infomask |= HEAP_HASVARWIDTH; + Assert(att[i]->attalign == 'c'); data_length = strlen(DatumGetCString(values[i])) + 1; memcpy(data, DatumGetPointer(values[i]), data_length); } else { /* fixed-length pass-by-reference */ + data = (char *) att_align_nominal((long) data, att[i]->attalign); Assert(att[i]->attlen > 0); data_length = att[i]->attlen; memcpy(data, DatumGetPointer(values[i]), data_length); @@ -182,6 +304,8 @@ heap_fill_tuple(TupleDesc tupleDesc, data += data_length; } + + Assert((data - start) == data_size); } /* ---------------- @@ -193,18 +317,19 @@ heap_fill_tuple(TupleDesc tupleDesc, * ---------------- */ static void -DataFill(char *data, - TupleDesc tupleDesc, - Datum *values, - char *nulls, - uint16 *infomask, - bits8 *bit) +DataFill(TupleDesc tupleDesc, + Datum *values, char *nulls, + char *data, Size data_size, + uint16 *infomask, bits8 *bit) { bits8 *bitP; int bitmask; int i; int numberOfAttributes = tupleDesc->natts; Form_pg_attribute *att = tupleDesc->attrs; +#ifdef USE_ASSERT_CHECKING + char *start = data; +#endif if (bit != NULL) { @@ -218,7 +343,7 @@ DataFill(char *data, bitmask = 0; } - *infomask &= ~(HEAP_HASNULL | HEAP_HASVARWIDTH | HEAP_HASEXTENDED); + *infomask &= ~(HEAP_HASNULL | HEAP_HASVARWIDTH | HEAP_HASEXTERNAL); for (i = 0; i < numberOfAttributes; i++) { @@ -244,36 +369,66 @@ DataFill(char *data, *bitP |= bitmask; } - /* XXX we are aligning the pointer itself, not the offset */ - data = (char *) att_align((long) data, att[i]->attalign); + /* + * XXX we use the att_align macros on the pointer value itself, + * not on an offset. This is a bit of a hack. + */ if (att[i]->attbyval) { /* pass-by-value */ + data = (char *) att_align_nominal((long) data, att[i]->attalign); store_att_byval(data, values[i], att[i]->attlen); data_length = att[i]->attlen; } else if (att[i]->attlen == -1) { /* varlena */ + Pointer val = DatumGetPointer(values[i]); + *infomask |= HEAP_HASVARWIDTH; - if (VARATT_IS_EXTERNAL(values[i])) + if (VARATT_IS_EXTERNAL(val)) + { *infomask |= HEAP_HASEXTERNAL; - if (VARATT_IS_COMPRESSED(values[i])) - *infomask |= HEAP_HASCOMPRESSED; - data_length = VARSIZE(DatumGetPointer(values[i])); - memcpy(data, DatumGetPointer(values[i]), data_length); + /* no alignment, since it's short by definition */ + data_length = VARSIZE_EXTERNAL(val); + memcpy(data, val, data_length); + } + else if (VARATT_IS_SHORT(val)) + { + /* no alignment for short varlenas */ + data_length = VARSIZE_SHORT(val); + memcpy(data, val, data_length); + } + else if (VARLENA_ATT_IS_PACKABLE(att[i]) && + VARATT_CAN_MAKE_SHORT(val)) + { + /* convert to short varlena -- no alignment */ + data_length = VARATT_CONVERTED_SHORT_SIZE(val); + SET_VARSIZE_SHORT(data, data_length); + memcpy(data + 1, VARDATA(val), data_length - 1); + } + else + { + /* full 4-byte header varlena */ + data = (char *) att_align_nominal((long) data, + att[i]->attalign); + data_length = VARSIZE(val); + memcpy(data, val, data_length); + } } else if (att[i]->attlen == -2) { - /* cstring */ + /* cstring ... never needs alignment */ *infomask |= HEAP_HASVARWIDTH; + Assert(att[i]->attalign == 'c'); data_length = strlen(DatumGetCString(values[i])) + 1; memcpy(data, DatumGetPointer(values[i]), data_length); } else { /* fixed-length pass-by-reference */ + data = (char *) att_align_nominal((long) data, att[i]->attalign); Assert(att[i]->attlen > 0); data_length = att[i]->attlen; memcpy(data, DatumGetPointer(values[i]), data_length); @@ -281,6 +436,8 @@ DataFill(char *data, data += data_length; } + + Assert((data - start) == data_size); } /* ---------------------------------------------------------------- @@ -343,6 +500,8 @@ heap_attisnull(HeapTuple tup, int attnum) * the same attribute descriptor will go much quicker. -cim 5/4/91 * * NOTE: if you need to change this code, see also heap_deform_tuple. + * Also see nocache_index_getattr, which is the same code for index + * tuples. * ---------------- */ Datum @@ -353,20 +512,12 @@ nocachegetattr(HeapTuple tuple, { HeapTupleHeader tup = tuple->t_data; Form_pg_attribute *att = tupleDesc->attrs; - char *tp; /* ptr to att in tuple */ + char *tp; /* ptr to data part of tuple */ bits8 *bp = tup->t_bits; /* ptr to null bitmap in tuple */ - bool slow = false; /* do we have to walk nulls? */ + bool slow = false; /* do we have to walk attrs? */ + int off; /* current offset within data */ (void) isnull; /* not used */ -#ifdef IN_MACRO -/* This is handled in the macro */ - Assert(attnum > 0); - - if (isnull) - *isnull = false; -#endif - - attnum--; /* ---------------- * Three cases: @@ -377,11 +528,21 @@ nocachegetattr(HeapTuple tuple, * ---------------- */ +#ifdef IN_MACRO +/* This is handled in the macro */ + Assert(attnum > 0); + + if (isnull) + *isnull = false; +#endif + + attnum--; + if (HeapTupleNoNulls(tuple)) { #ifdef IN_MACRO /* This is handled in the macro */ - if (att[attnum]->attcacheoff != -1) + if (att[attnum]->attcacheoff >= 0) { return fetchatt(att[attnum], (char *) tup + tup->t_hoff + @@ -436,24 +597,27 @@ nocachegetattr(HeapTuple tuple, tp = (char *) tup + tup->t_hoff; - /* - * now check for any non-fixed length attrs before our attribute - */ if (!slow) { - if (att[attnum]->attcacheoff != -1) + /* + * If we get here, there are no nulls up to and including the target + * attribute. If we have a cached offset, we can use it. + */ + if (att[attnum]->attcacheoff >= 0) { return fetchatt(att[attnum], tp + att[attnum]->attcacheoff); } - else if (HeapTupleHasVarWidth(tuple)) + + /* + * Otherwise, check for non-fixed-length attrs up to and including + * target. If there aren't any, it's safe to cheaply initialize + * the cached offsets for these attrs. + */ + if (HeapTupleHasVarWidth(tuple)) { int j; - /* - * In for(), we test <= and not < because we want to see if we can - * go past it in initializing offsets. - */ for (j = 0; j <= attnum; j++) { if (att[j]->attlen <= 0) @@ -465,89 +629,109 @@ nocachegetattr(HeapTuple tuple, } } - /* - * If slow is false, and we got here, we know that we have a tuple with no - * nulls or var-widths before the target attribute. If possible, we also - * want to initialize the remainder of the attribute cached offset values. - */ if (!slow) { + int natts = tupleDesc->natts; int j = 1; - long off; - int natts = HeapTupleHeaderGetNatts(tup); /* - * need to set cache for some atts + * If we get here, we have a tuple with no nulls or var-widths up to + * and including the target attribute, so we can use the cached offset + * ... only we don't have it yet, or we'd not have got here. Since + * it's cheap to compute offsets for fixed-width columns, we take the + * opportunity to initialize the cached offsets for *all* the leading + * fixed-width columns, in hope of avoiding future visits to this + * routine. */ - att[0]->attcacheoff = 0; - while (j < attnum && att[j]->attcacheoff > 0) + /* we might have set some offsets in the slow path previously */ + while (j < natts && att[j]->attcacheoff > 0) j++; off = att[j - 1]->attcacheoff + att[j - 1]->attlen; - for (; j <= attnum || - /* Can we compute more? We will probably need them */ - (j < natts && - att[j]->attcacheoff == -1 && - (HeapTupleNoNulls(tuple) || !att_isnull(j, bp)) && - (HeapTupleAllFixed(tuple) || att[j]->attlen > 0)); j++) + for (; j < natts; j++) { - off = att_align(off, att[j]->attalign); + if (att[j]->attlen <= 0) + break; + + off = att_align_nominal(off, att[j]->attalign); att[j]->attcacheoff = off; - off = att_addlength(off, att[j]->attlen, tp + off); + off += att[j]->attlen; } - return fetchatt(att[attnum], tp + att[attnum]->attcacheoff); + Assert(j > attnum); + + off = att[attnum]->attcacheoff; } else { bool usecache = true; - int off = 0; int i; /* - * Now we know that we have to walk the tuple CAREFULLY. + * Now we know that we have to walk the tuple CAREFULLY. But we + * still might be able to cache some offsets for next time. * * Note - This loop is a little tricky. For each non-null attribute, * we have to first account for alignment padding before the attr, * then advance over the attr based on its length. Nulls have no * storage and no alignment padding either. We can use/set - * attcacheoff until we pass either a null or a var-width attribute. + * attcacheoff until we reach either a null or a var-width attribute. */ - - for (i = 0; i < attnum; i++) + off = 0; + for (i = 0; ; i++) /* loop exit is at "break" */ { if (HeapTupleHasNulls(tuple) && att_isnull(i, bp)) { usecache = false; - continue; + continue; /* this cannot be the target att */ } - /* If we know the next offset, we can skip the alignment calc */ - if (usecache && att[i]->attcacheoff != -1) + /* If we know the next offset, we can skip the rest */ + if (usecache && att[i]->attcacheoff >= 0) off = att[i]->attcacheoff; + else if (att[i]->attlen == -1) + { + /* + * We can only cache the offset for a varlena attribute + * if the offset is already suitably aligned, so that there + * would be no pad bytes in any case: then the offset will + * be valid for either an aligned or unaligned value. + */ + if (usecache && + off == att_align_nominal(off, att[i]->attalign)) + att[i]->attcacheoff = off; + else + { + off = att_align_pointer(off, att[i]->attalign, -1, + tp + off); + usecache = false; + } + } else { - off = att_align(off, att[i]->attalign); + /* not varlena, so safe to use att_align_nominal */ + off = att_align_nominal(off, att[i]->attalign); if (usecache) att[i]->attcacheoff = off; } - off = att_addlength(off, att[i]->attlen, tp + off); + if (i == attnum) + break; + + off = att_addlength_pointer(off, att[i]->attlen, tp + off); if (usecache && att[i]->attlen <= 0) usecache = false; } - - off = att_align(off, att[attnum]->attalign); - - return fetchatt(att[attnum], tp + off); } + + return fetchatt(att[attnum], tp + off); } /* ---------------- @@ -671,7 +855,7 @@ heap_form_tuple(TupleDesc tupleDescriptor, { HeapTuple tuple; /* return tuple */ HeapTupleHeader td; /* tuple data */ - unsigned long len; + Size len, data_len; int hoff; bool hasnull = false; Form_pg_attribute *att = tupleDescriptor->attrs; @@ -723,7 +907,9 @@ heap_form_tuple(TupleDesc tupleDescriptor, hoff = len = MAXALIGN(len); /* align user data safely */ - len += heap_compute_data_size(tupleDescriptor, values, isnull); + data_len = heap_compute_data_size(tupleDescriptor, values, isnull); + + len += data_len; /* * Allocate and zero the space needed. Note that the tuple body and @@ -754,6 +940,7 @@ heap_form_tuple(TupleDesc tupleDescriptor, values, isnull, (char *) td + hoff, + data_len, &td->t_infomask, (hasnull ? td->t_bits : NULL)); @@ -778,7 +965,7 @@ heap_formtuple(TupleDesc tupleDescriptor, { HeapTuple tuple; /* return tuple */ HeapTupleHeader td; /* tuple data */ - unsigned long len; + Size len, data_len; int hoff; bool hasnull = false; Form_pg_attribute *att = tupleDescriptor->attrs; @@ -830,7 +1017,9 @@ heap_formtuple(TupleDesc tupleDescriptor, hoff = len = MAXALIGN(len); /* align user data safely */ - len += ComputeDataSize(tupleDescriptor, values, nulls); + data_len = ComputeDataSize(tupleDescriptor, values, nulls); + + len += data_len; /* * Allocate and zero the space needed. Note that the tuple body and @@ -857,16 +1046,18 @@ heap_formtuple(TupleDesc tupleDescriptor, if (tupleDescriptor->tdhasoid) /* else leave infomask = 0 */ td->t_infomask = HEAP_HASOID; - DataFill((char *) td + hoff, - tupleDescriptor, + DataFill(tupleDescriptor, values, nulls, + (char *) td + hoff, + data_len, &td->t_infomask, (hasnull ? td->t_bits : NULL)); return tuple; } + /* * heap_modify_tuple * form a new tuple from an old tuple and a set of replacement values. @@ -1069,9 +1260,28 @@ heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, if (!slow && thisatt->attcacheoff >= 0) off = thisatt->attcacheoff; + else if (thisatt->attlen == -1) + { + /* + * We can only cache the offset for a varlena attribute + * if the offset is already suitably aligned, so that there + * would be no pad bytes in any case: then the offset will + * be valid for either an aligned or unaligned value. + */ + if (!slow && + off == att_align_nominal(off, thisatt->attalign)) + thisatt->attcacheoff = off; + else + { + off = att_align_pointer(off, thisatt->attalign, -1, + tp + off); + slow = true; + } + } else { - off = att_align(off, thisatt->attalign); + /* not varlena, so safe to use att_align_nominal */ + off = att_align_nominal(off, thisatt->attalign); if (!slow) thisatt->attcacheoff = off; @@ -1079,7 +1289,7 @@ heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, values[attnum] = fetchatt(thisatt, tp + off); - off = att_addlength(off, thisatt->attlen, tp + off); + off = att_addlength_pointer(off, thisatt->attlen, tp + off); if (thisatt->attlen <= 0) slow = true; /* can't use attcacheoff anymore */ @@ -1162,9 +1372,28 @@ heap_deformtuple(HeapTuple tuple, if (!slow && thisatt->attcacheoff >= 0) off = thisatt->attcacheoff; + else if (thisatt->attlen == -1) + { + /* + * We can only cache the offset for a varlena attribute + * if the offset is already suitably aligned, so that there + * would be no pad bytes in any case: then the offset will + * be valid for either an aligned or unaligned value. + */ + if (!slow && + off == att_align_nominal(off, thisatt->attalign)) + thisatt->attcacheoff = off; + else + { + off = att_align_pointer(off, thisatt->attalign, -1, + tp + off); + slow = true; + } + } else { - off = att_align(off, thisatt->attalign); + /* not varlena, so safe to use att_align_nominal */ + off = att_align_nominal(off, thisatt->attalign); if (!slow) thisatt->attcacheoff = off; @@ -1172,7 +1401,7 @@ heap_deformtuple(HeapTuple tuple, values[attnum] = fetchatt(thisatt, tp + off); - off = att_addlength(off, thisatt->attlen, tp + off); + off = att_addlength_pointer(off, thisatt->attlen, tp + off); if (thisatt->attlen <= 0) slow = true; /* can't use attcacheoff anymore */ @@ -1252,9 +1481,28 @@ slot_deform_tuple(TupleTableSlot *slot, int natts) if (!slow && thisatt->attcacheoff >= 0) off = thisatt->attcacheoff; + else if (thisatt->attlen == -1) + { + /* + * We can only cache the offset for a varlena attribute + * if the offset is already suitably aligned, so that there + * would be no pad bytes in any case: then the offset will + * be valid for either an aligned or unaligned value. + */ + if (!slow && + off == att_align_nominal(off, thisatt->attalign)) + thisatt->attcacheoff = off; + else + { + off = att_align_pointer(off, thisatt->attalign, -1, + tp + off); + slow = true; + } + } else { - off = att_align(off, thisatt->attalign); + /* not varlena, so safe to use att_align_nominal */ + off = att_align_nominal(off, thisatt->attalign); if (!slow) thisatt->attcacheoff = off; @@ -1262,7 +1510,7 @@ slot_deform_tuple(TupleTableSlot *slot, int natts) values[attnum] = fetchatt(thisatt, tp + off); - off = att_addlength(off, thisatt->attlen, tp + off); + off = att_addlength_pointer(off, thisatt->attlen, tp + off); if (thisatt->attlen <= 0) slow = true; /* can't use attcacheoff anymore */ @@ -1543,7 +1791,7 @@ heap_form_minimal_tuple(TupleDesc tupleDescriptor, bool *isnull) { MinimalTuple tuple; /* return tuple */ - unsigned long len; + Size len, data_len; int hoff; bool hasnull = false; Form_pg_attribute *att = tupleDescriptor->attrs; @@ -1595,7 +1843,9 @@ heap_form_minimal_tuple(TupleDesc tupleDescriptor, hoff = len = MAXALIGN(len); /* align user data safely */ - len += heap_compute_data_size(tupleDescriptor, values, isnull); + data_len = heap_compute_data_size(tupleDescriptor, values, isnull); + + len += data_len; /* * Allocate and zero the space needed. @@ -1616,6 +1866,7 @@ heap_form_minimal_tuple(TupleDesc tupleDescriptor, values, isnull, (char *) tuple + hoff, + data_len, &tuple->t_infomask, (hasnull ? tuple->t_bits : NULL)); diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c index c83e34834ca..471d28c28c4 100644 --- a/src/backend/access/common/indextuple.c +++ b/src/backend/access/common/indextuple.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/common/indextuple.c,v 1.81 2007/02/27 23:48:06 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/common/indextuple.c,v 1.82 2007/04/06 04:21:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -38,6 +38,7 @@ index_form_tuple(TupleDesc tupleDescriptor, char *tp; /* tuple pointer */ IndexTuple tuple; /* return tuple */ Size size, + data_size, hoff; int i; unsigned short infomask = 0; @@ -74,9 +75,9 @@ index_form_tuple(TupleDesc tupleDescriptor, */ if (VARATT_IS_EXTERNAL(values[i])) { - untoasted_values[i] = PointerGetDatum( - heap_tuple_fetch_attr( - (varattrib *) DatumGetPointer(values[i]))); + untoasted_values[i] = + PointerGetDatum(heap_tuple_fetch_attr((struct varlena *) + DatumGetPointer(values[i]))); untoasted_free[i] = true; } @@ -84,8 +85,8 @@ index_form_tuple(TupleDesc tupleDescriptor, * If value is above size target, and is of a compressible datatype, * try to compress it in-line. */ - if (VARSIZE(untoasted_values[i]) > TOAST_INDEX_TARGET && - !VARATT_IS_EXTENDED(untoasted_values[i]) && + if (!VARATT_IS_EXTENDED(untoasted_values[i]) && + VARSIZE(untoasted_values[i]) > TOAST_INDEX_TARGET && (att->attstorage == 'x' || att->attstorage == 'm')) { Datum cvalue = toast_compress_datum(untoasted_values[i]); @@ -116,12 +117,13 @@ index_form_tuple(TupleDesc tupleDescriptor, hoff = IndexInfoFindDataOffset(infomask); #ifdef TOAST_INDEX_HACK - size = hoff + heap_compute_data_size(tupleDescriptor, - untoasted_values, isnull); + data_size = heap_compute_data_size(tupleDescriptor, + untoasted_values, isnull); #else - size = hoff + heap_compute_data_size(tupleDescriptor, - values, isnull); + data_size = heap_compute_data_size(tupleDescriptor, + values, isnull); #endif + size = hoff + data_size; size = MAXALIGN(size); /* be conservative */ tp = (char *) palloc0(size); @@ -135,6 +137,7 @@ index_form_tuple(TupleDesc tupleDescriptor, #endif isnull, (char *) tp + hoff, + data_size, &tupmask, (hasnull ? (bits8 *) tp + sizeof(IndexTupleData) : NULL)); @@ -201,17 +204,14 @@ nocache_index_getattr(IndexTuple tup, bool *isnull) { Form_pg_attribute *att = tupleDesc->attrs; - char *tp; /* ptr to att in tuple */ - bits8 *bp = NULL; /* ptr to null bitmask in tuple */ - bool slow = false; /* do we have to walk nulls? */ + char *tp; /* ptr to data part of tuple */ + bits8 *bp = NULL; /* ptr to null bitmap in tuple */ + bool slow = false; /* do we have to walk attrs? */ int data_off; /* tuple data offset */ + int off; /* current offset within data */ (void) isnull; /* not used */ - /* - * sanity checks - */ - /* ---------------- * Three cases: * @@ -237,7 +237,7 @@ nocache_index_getattr(IndexTuple tup, { #ifdef IN_MACRO /* This is handled in the macro */ - if (att[attnum]->attcacheoff != -1) + if (att[attnum]->attcacheoff >= 0) { return fetchatt(att[attnum], (char *) tup + data_off + @@ -295,21 +295,28 @@ nocache_index_getattr(IndexTuple tup, tp = (char *) tup + data_off; - /* - * now check for any non-fixed length attrs before our attribute - */ if (!slow) { - if (att[attnum]->attcacheoff != -1) + /* + * If we get here, there are no nulls up to and including the target + * attribute. If we have a cached offset, we can use it. + */ + if (att[attnum]->attcacheoff >= 0) { return fetchatt(att[attnum], tp + att[attnum]->attcacheoff); } - else if (IndexTupleHasVarwidths(tup)) + + /* + * Otherwise, check for non-fixed-length attrs up to and including + * target. If there aren't any, it's safe to cheaply initialize + * the cached offsets for these attrs. + */ + if (IndexTupleHasVarwidths(tup)) { int j; - for (j = 0; j < attnum; j++) + for (j = 0; j <= attnum; j++) { if (att[j]->attlen <= 0) { @@ -320,80 +327,109 @@ nocache_index_getattr(IndexTuple tup, } } - /* - * If slow is false, and we got here, we know that we have a tuple with no - * nulls or var-widths before the target attribute. If possible, we also - * want to initialize the remainder of the attribute cached offset values. - */ if (!slow) { + int natts = tupleDesc->natts; int j = 1; - long off; /* - * need to set cache for some atts + * If we get here, we have a tuple with no nulls or var-widths up to + * and including the target attribute, so we can use the cached offset + * ... only we don't have it yet, or we'd not have got here. Since + * it's cheap to compute offsets for fixed-width columns, we take the + * opportunity to initialize the cached offsets for *all* the leading + * fixed-width columns, in hope of avoiding future visits to this + * routine. */ - att[0]->attcacheoff = 0; - while (j < attnum && att[j]->attcacheoff > 0) + /* we might have set some offsets in the slow path previously */ + while (j < natts && att[j]->attcacheoff > 0) j++; off = att[j - 1]->attcacheoff + att[j - 1]->attlen; - for (; j <= attnum; j++) + for (; j < natts; j++) { - off = att_align(off, att[j]->attalign); + if (att[j]->attlen <= 0) + break; + + off = att_align_nominal(off, att[j]->attalign); att[j]->attcacheoff = off; off += att[j]->attlen; } - return fetchatt(att[attnum], tp + att[attnum]->attcacheoff); + Assert(j > attnum); + + off = att[attnum]->attcacheoff; } else { bool usecache = true; - int off = 0; int i; /* - * Now we know that we have to walk the tuple CAREFULLY. + * Now we know that we have to walk the tuple CAREFULLY. But we + * still might be able to cache some offsets for next time. + * + * Note - This loop is a little tricky. For each non-null attribute, + * we have to first account for alignment padding before the attr, + * then advance over the attr based on its length. Nulls have no + * storage and no alignment padding either. We can use/set + * attcacheoff until we reach either a null or a var-width attribute. */ - - for (i = 0; i < attnum; i++) + off = 0; + for (i = 0; ; i++) /* loop exit is at "break" */ { - if (IndexTupleHasNulls(tup)) + if (IndexTupleHasNulls(tup) && att_isnull(i, bp)) { - if (att_isnull(i, bp)) - { - usecache = false; - continue; - } + usecache = false; + continue; /* this cannot be the target att */ } /* If we know the next offset, we can skip the rest */ - if (usecache && att[i]->attcacheoff != -1) + if (usecache && att[i]->attcacheoff >= 0) off = att[i]->attcacheoff; + else if (att[i]->attlen == -1) + { + /* + * We can only cache the offset for a varlena attribute + * if the offset is already suitably aligned, so that there + * would be no pad bytes in any case: then the offset will + * be valid for either an aligned or unaligned value. + */ + if (usecache && + off == att_align_nominal(off, att[i]->attalign)) + att[i]->attcacheoff = off; + else + { + off = att_align_pointer(off, att[i]->attalign, -1, + tp + off); + usecache = false; + } + } else { - off = att_align(off, att[i]->attalign); + /* not varlena, so safe to use att_align_nominal */ + off = att_align_nominal(off, att[i]->attalign); if (usecache) att[i]->attcacheoff = off; } - off = att_addlength(off, att[i]->attlen, tp + off); + if (i == attnum) + break; + + off = att_addlength_pointer(off, att[i]->attlen, tp + off); if (usecache && att[i]->attlen <= 0) usecache = false; } - - off = att_align(off, att[attnum]->attalign); - - return fetchatt(att[attnum], tp + off); } + + return fetchatt(att[attnum], tp + off); } /* diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 334d6700423..1a3c01bcac9 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.73 2007/04/03 04:14:26 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.74 2007/04/06 04:21:41 tgl Exp $ * * * INTERFACE ROUTINES @@ -42,25 +42,39 @@ #undef TOAST_DEBUG +/* + * Testing whether an externally-stored value is compressed now requires + * comparing extsize (the actual length of the external data) to rawsize + * (the original uncompressed datum's size). The latter includes VARHDRSZ + * overhead, the former doesn't. We never use compression unless it actually + * saves space, so we expect either equality or less-than. + */ +#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \ + ((toast_pointer).va_extsize < (toast_pointer).va_rawsize - VARHDRSZ) + static void toast_delete_datum(Relation rel, Datum value); static Datum toast_save_datum(Relation rel, Datum value, bool use_wal, bool use_fsm); -static varattrib *toast_fetch_datum(varattrib *attr); -static varattrib *toast_fetch_datum_slice(varattrib *attr, +static struct varlena *toast_fetch_datum(struct varlena *attr); +static struct varlena *toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length); /* ---------- * heap_tuple_fetch_attr - * - * Public entry point to get back a toasted value + * Public entry point to get back a toasted value from * external storage (possibly still in compressed format). - * ---------- + * + * This will return a datum that contains all the data internally, ie, not + * relying on external storage, but it can still be compressed or have a short + * header. + ---------- */ -varattrib * -heap_tuple_fetch_attr(varattrib *attr) +struct varlena * +heap_tuple_fetch_attr(struct varlena *attr) { - varattrib *result; + struct varlena *result; if (VARATT_IS_EXTERNAL(attr)) { @@ -88,35 +102,25 @@ heap_tuple_fetch_attr(varattrib *attr) * or external storage. * ---------- */ -varattrib * -heap_tuple_untoast_attr(varattrib *attr) +struct varlena * +heap_tuple_untoast_attr(struct varlena *attr) { - varattrib *result; - if (VARATT_IS_EXTERNAL(attr)) { + /* + * This is an externally stored datum --- fetch it back from there + */ + attr = toast_fetch_datum(attr); + /* If it's compressed, decompress it */ if (VARATT_IS_COMPRESSED(attr)) { - /* ---------- - * This is an external stored compressed value - * Fetch it from the toast heap and decompress. - * ---------- - */ - PGLZ_Header *tmp; + PGLZ_Header *tmp = (PGLZ_Header *) attr; - tmp = (PGLZ_Header *) toast_fetch_datum(attr); - result = (varattrib *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); - SET_VARSIZE(result, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); - pglz_decompress(tmp, VARDATA(result)); + attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); + SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); + pglz_decompress(tmp, VARDATA(attr)); pfree(tmp); } - else - { - /* - * This is an external stored plain value - */ - result = toast_fetch_datum(attr); - } } else if (VARATT_IS_COMPRESSED(attr)) { @@ -125,18 +129,26 @@ heap_tuple_untoast_attr(varattrib *attr) */ PGLZ_Header *tmp = (PGLZ_Header *) attr; - result = (varattrib *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); - SET_VARSIZE(result, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); - pglz_decompress(tmp, VARDATA(result)); + attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); + SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); + pglz_decompress(tmp, VARDATA(attr)); } - else - + else if (VARATT_IS_SHORT(attr)) + { /* - * This is a plain value inside of the main tuple - why am I called? + * This is a short-header varlena --- convert to 4-byte header format */ - return attr; + Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT; + Size new_size = data_size + VARHDRSZ; + struct varlena *new_attr; + + new_attr = (struct varlena *) palloc(new_size); + SET_VARSIZE(new_attr, new_size); + memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size); + attr = new_attr; + } - return result; + return attr; } @@ -147,44 +159,57 @@ heap_tuple_untoast_attr(varattrib *attr) * from compression or external storage. * ---------- */ -varattrib * -heap_tuple_untoast_attr_slice(varattrib *attr, int32 sliceoffset, int32 slicelength) +struct varlena * +heap_tuple_untoast_attr_slice(struct varlena *attr, + int32 sliceoffset, int32 slicelength) { - varattrib *preslice; - varattrib *result; + struct varlena *preslice; + struct varlena *result; + char *attrdata; int32 attrsize; - if (VARATT_IS_COMPRESSED(attr)) + if (VARATT_IS_EXTERNAL(attr)) { - PGLZ_Header *tmp; + struct varatt_external toast_pointer; - if (VARATT_IS_EXTERNAL(attr)) - tmp = (PGLZ_Header *) toast_fetch_datum(attr); - else - tmp = (PGLZ_Header *) attr; /* compressed in main tuple */ + memcpy(&toast_pointer, VARDATA_SHORT(attr), sizeof(toast_pointer)); + + /* fast path for non-compressed external datums */ + if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) + return toast_fetch_datum_slice(attr, sliceoffset, slicelength); - preslice = (varattrib *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); - SET_VARSIZE(preslice, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); + /* fetch it back (compressed marker will get set automatically) */ + preslice = toast_fetch_datum(attr); + } + else + preslice = attr; + + if (VARATT_IS_COMPRESSED(preslice)) + { + PGLZ_Header *tmp = (PGLZ_Header *) preslice; + Size size = PGLZ_RAW_SIZE(tmp) + VARHDRSZ; + + preslice = (struct varlena *) palloc(size); + SET_VARSIZE(preslice, size); pglz_decompress(tmp, VARDATA(preslice)); if (tmp != (PGLZ_Header *) attr) pfree(tmp); } + + if (VARATT_IS_SHORT(preslice)) + { + attrdata = VARDATA_SHORT(preslice); + attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT; + } else { - /* Plain value */ - if (VARATT_IS_EXTERNAL(attr)) - { - /* fast path */ - return toast_fetch_datum_slice(attr, sliceoffset, slicelength); - } - else - preslice = attr; + attrdata = VARDATA(preslice); + attrsize = VARSIZE(preslice) - VARHDRSZ; } /* slicing of datum for compressed cases and plain value */ - attrsize = VARSIZE(preslice) - VARHDRSZ; if (sliceoffset >= attrsize) { sliceoffset = 0; @@ -194,10 +219,10 @@ heap_tuple_untoast_attr_slice(varattrib *attr, int32 sliceoffset, int32 slicelen if (((sliceoffset + slicelength) > attrsize) || slicelength < 0) slicelength = attrsize - sliceoffset; - result = (varattrib *) palloc(slicelength + VARHDRSZ); + result = (struct varlena *) palloc(slicelength + VARHDRSZ); SET_VARSIZE(result, slicelength + VARHDRSZ); - memcpy(VARDATA(result), VARDATA(preslice) + sliceoffset, slicelength); + memcpy(VARDATA(result), attrdata + sliceoffset, slicelength); if (preslice != attr) pfree(preslice); @@ -210,29 +235,35 @@ heap_tuple_untoast_attr_slice(varattrib *attr, int32 sliceoffset, int32 slicelen * toast_raw_datum_size - * * Return the raw (detoasted) size of a varlena datum + * (including the VARHDRSZ header) * ---------- */ Size toast_raw_datum_size(Datum value) { - varattrib *attr = (varattrib *) DatumGetPointer(value); + struct varlena *attr = (struct varlena *) DatumGetPointer(value); Size result; - if (VARATT_IS_COMPRESSED(attr)) + if (VARATT_IS_EXTERNAL(attr)) { - /* - * va_rawsize shows the original data size, whether the datum is - * external or not. - */ - result = attr->va_content.va_compressed.va_rawsize + VARHDRSZ; + /* va_rawsize is the size of the original datum -- including header */ + struct varatt_external toast_pointer; + + memcpy(&toast_pointer, VARDATA_SHORT(attr), sizeof(toast_pointer)); + result = toast_pointer.va_rawsize; } - else if (VARATT_IS_EXTERNAL(attr)) + else if (VARATT_IS_COMPRESSED(attr)) + { + /* here, va_rawsize is just the payload size */ + result = VARRAWSIZE_4B_C(attr) + VARHDRSZ; + } + else if (VARATT_IS_SHORT(attr)) { /* - * an uncompressed external attribute has rawsize including the header - * (not too consistent!) + * we have to normalize the header length to VARHDRSZ or else the + * callers of this function will be confused. */ - result = attr->va_content.va_external.va_rawsize; + result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ; } else { @@ -251,7 +282,7 @@ toast_raw_datum_size(Datum value) Size toast_datum_size(Datum value) { - varattrib *attr = (varattrib *) DatumGetPointer(value); + struct varlena *attr = (struct varlena *) DatumGetPointer(value); Size result; if (VARATT_IS_EXTERNAL(attr)) @@ -261,7 +292,14 @@ toast_datum_size(Datum value) * compressed or not. We do not count the size of the toast pointer * ... should we? */ - result = attr->va_content.va_external.va_extsize; + struct varatt_external toast_pointer; + + memcpy(&toast_pointer, VARDATA_SHORT(attr), sizeof(toast_pointer)); + result = toast_pointer.va_extsize; + } + else if (VARATT_IS_SHORT(attr)) + { + result = VARSIZE_SHORT(attr); } else { @@ -413,16 +451,16 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, for (i = 0; i < numAttrs; i++) { - varattrib *old_value; - varattrib *new_value; + struct varlena *old_value; + struct varlena *new_value; if (oldtup != NULL) { /* * For UPDATE get the old and new values of this attribute */ - old_value = (varattrib *) DatumGetPointer(toast_oldvalues[i]); - new_value = (varattrib *) DatumGetPointer(toast_values[i]); + old_value = (struct varlena *) DatumGetPointer(toast_oldvalues[i]); + new_value = (struct varlena *) DatumGetPointer(toast_values[i]); /* * If the old value is an external stored one, check if it has @@ -432,10 +470,9 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, VARATT_IS_EXTERNAL(old_value)) { if (toast_isnull[i] || !VARATT_IS_EXTERNAL(new_value) || - old_value->va_content.va_external.va_valueid != - new_value->va_content.va_external.va_valueid || - old_value->va_content.va_external.va_toastrelid != - new_value->va_content.va_external.va_toastrelid) + memcmp(VARDATA_SHORT(old_value), + VARDATA_SHORT(new_value), + sizeof(struct varatt_external)) != 0) { /* * The old external stored value isn't needed any more @@ -452,7 +489,6 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, * tuple. */ toast_action[i] = 'p'; - toast_sizes[i] = VARSIZE(toast_values[i]); continue; } } @@ -462,7 +498,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, /* * For INSERT simply get the new value */ - new_value = (varattrib *) DatumGetPointer(toast_values[i]); + new_value = (struct varlena *) DatumGetPointer(toast_values[i]); } /* @@ -503,7 +539,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, /* * Remember the size of this attribute */ - toast_sizes[i] = VARSIZE(new_value); + toast_sizes[i] = VARSIZE_ANY(new_value); } else { @@ -542,7 +578,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, toast_values, toast_isnull) > maxDataLen) { int biggest_attno = -1; - int32 biggest_size = MAXALIGN(sizeof(varattrib)); + int32 biggest_size = MAXALIGN(sizeof(varattrib_pointer)); Datum old_value; Datum new_value; @@ -553,7 +589,9 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, { if (toast_action[i] != ' ') continue; - if (VARATT_IS_EXTENDED(toast_values[i])) + if (VARATT_IS_EXTERNAL(toast_values[i])) + continue; + if (VARATT_IS_COMPRESSED(toast_values[i])) continue; if (att[i]->attstorage != 'x') continue; @@ -603,7 +641,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, rel->rd_rel->reltoastrelid != InvalidOid) { int biggest_attno = -1; - int32 biggest_size = MAXALIGN(sizeof(varattrib)); + int32 biggest_size = MAXALIGN(sizeof(varattrib_pointer)); Datum old_value; /*------ @@ -639,9 +677,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, use_wal, use_fsm); if (toast_free[i]) pfree(DatumGetPointer(old_value)); - toast_free[i] = true; - toast_sizes[i] = VARSIZE(toast_values[i]); need_change = true; need_free = true; @@ -655,7 +691,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, toast_values, toast_isnull) > maxDataLen) { int biggest_attno = -1; - int32 biggest_size = MAXALIGN(sizeof(varattrib)); + int32 biggest_size = MAXALIGN(sizeof(varattrib_pointer)); Datum old_value; Datum new_value; @@ -666,7 +702,9 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, { if (toast_action[i] != ' ') continue; - if (VARATT_IS_EXTENDED(toast_values[i])) + if (VARATT_IS_EXTERNAL(toast_values[i])) + continue; + if (VARATT_IS_COMPRESSED(toast_values[i])) continue; if (att[i]->attstorage != 'm') continue; @@ -715,7 +753,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, rel->rd_rel->reltoastrelid != InvalidOid) { int biggest_attno = -1; - int32 biggest_size = MAXALIGN(sizeof(varattrib)); + int32 biggest_size = MAXALIGN(sizeof(varattrib_pointer)); Datum old_value; /*-------- @@ -768,6 +806,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, HeapTupleHeader olddata = newtup->t_data; HeapTupleHeader new_data; int32 new_len; + int32 new_data_len; /* * Calculate the new size of the tuple. Header size should not @@ -780,8 +819,9 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, new_len += sizeof(Oid); new_len = MAXALIGN(new_len); Assert(new_len == olddata->t_hoff); - new_len += heap_compute_data_size(tupleDesc, - toast_values, toast_isnull); + new_data_len = heap_compute_data_size(tupleDesc, + toast_values, toast_isnull); + new_len += new_data_len; /* * Allocate and zero the space needed, and fill HeapTupleData fields. @@ -802,6 +842,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, toast_values, toast_isnull, (char *) new_data + olddata->t_hoff, + new_data_len, &(new_data->t_infomask), has_nulls ? new_data->t_bits : NULL); } @@ -835,6 +876,9 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, * This must be invoked on any potentially-composite field that is to be * inserted into a tuple. Doing this preserves the invariant that toasting * goes only one level deep in a tuple. + * + * Note that flattening does not mean expansion of short-header varlenas, + * so in one sense toasting is allowed within composite datums. * ---------- */ Datum @@ -845,6 +889,7 @@ toast_flatten_tuple_attribute(Datum value, HeapTupleHeader olddata; HeapTupleHeader new_data; int32 new_len; + int32 new_data_len; HeapTupleData tmptup; Form_pg_attribute *att; int numAttrs; @@ -891,10 +936,11 @@ toast_flatten_tuple_attribute(Datum value, has_nulls = true; else if (att[i]->attlen == -1) { - varattrib *new_value; + struct varlena *new_value; - new_value = (varattrib *) DatumGetPointer(toast_values[i]); - if (VARATT_IS_EXTENDED(new_value)) + new_value = (struct varlena *) DatumGetPointer(toast_values[i]); + if (VARATT_IS_EXTERNAL(new_value) || + VARATT_IS_COMPRESSED(new_value)) { new_value = heap_tuple_untoast_attr(new_value); toast_values[i] = PointerGetDatum(new_value); @@ -924,7 +970,9 @@ toast_flatten_tuple_attribute(Datum value, new_len += sizeof(Oid); new_len = MAXALIGN(new_len); Assert(new_len == olddata->t_hoff); - new_len += heap_compute_data_size(tupleDesc, toast_values, toast_isnull); + new_data_len = heap_compute_data_size(tupleDesc, + toast_values, toast_isnull); + new_len += new_data_len; new_data = (HeapTupleHeader) palloc0(new_len); @@ -939,6 +987,7 @@ toast_flatten_tuple_attribute(Datum value, toast_values, toast_isnull, (char *) new_data + olddata->t_hoff, + new_data_len, &(new_data->t_infomask), has_nulls ? new_data->t_bits : NULL); @@ -962,21 +1011,26 @@ toast_flatten_tuple_attribute(Datum value, * If we fail (ie, compressed result is actually bigger than original) * then return NULL. We must not use compressed data if it'd expand * the tuple! + * + * We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without + * copying them. But we can't handle external or compressed datums. * ---------- */ Datum toast_compress_datum(Datum value) { - varattrib *tmp; - int32 valsize = VARSIZE(value) - VARHDRSZ; + struct varlena *tmp; + int32 valsize = VARSIZE_ANY_EXHDR(value); - tmp = (varattrib *) palloc(PGLZ_MAX_OUTPUT(valsize)); - if (pglz_compress(VARDATA(value), valsize, + Assert(!VARATT_IS_EXTERNAL(value)); + Assert(!VARATT_IS_COMPRESSED(value)); + + tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize)); + if (pglz_compress(VARDATA_ANY(value), valsize, (PGLZ_Header *) tmp, PGLZ_strategy_default) && - VARSIZE(tmp) < VARSIZE(value)) + VARSIZE(tmp) < VARSIZE_ANY(value)) { /* successful compression */ - VARATT_SIZEP_DEPRECATED(tmp) |= VARATT_FLAG_COMPRESSED; return PointerGetDatum(tmp); } else @@ -992,7 +1046,7 @@ toast_compress_datum(Datum value) * toast_save_datum - * * Save one single datum into the secondary relation and return - * a varattrib reference for it. + * a Datum reference for it. * ---------- */ static Datum @@ -1006,7 +1060,8 @@ toast_save_datum(Relation rel, Datum value, Datum t_values[3]; bool t_isnull[3]; CommandId mycid = GetCurrentCommandId(); - varattrib *result; + struct varlena *result; + struct varatt_external toast_pointer; struct { struct varlena hdr; @@ -1027,44 +1082,51 @@ toast_save_datum(Relation rel, Datum value, toastidx = index_open(toastrel->rd_rel->reltoastidxid, RowExclusiveLock); /* - * Create the varattrib reference + * Get the data pointer and length, and compute va_rawsize and va_extsize. + * + * va_rawsize is the size of the equivalent fully uncompressed datum, + * so we have to adjust for short headers. + * + * va_extsize is the actual size of the data payload in the toast records. */ - result = (varattrib *) palloc(sizeof(varattrib)); - - SET_VARSIZE(result, sizeof(varattrib)); - VARATT_SIZEP_DEPRECATED(result) |= VARATT_FLAG_EXTERNAL; - if (VARATT_IS_COMPRESSED(value)) + if (VARATT_IS_SHORT(value)) + { + data_p = VARDATA_SHORT(value); + data_todo = VARSIZE_SHORT(value) - VARHDRSZ_SHORT; + toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */ + toast_pointer.va_extsize = data_todo; + } + else if (VARATT_IS_COMPRESSED(value)) { - VARATT_SIZEP_DEPRECATED(result) |= VARATT_FLAG_COMPRESSED; - result->va_content.va_external.va_rawsize = - ((varattrib *) value)->va_content.va_compressed.va_rawsize; + data_p = VARDATA(value); + data_todo = VARSIZE(value) - VARHDRSZ; + /* rawsize in a compressed datum is just the size of the payload */ + toast_pointer.va_rawsize = VARRAWSIZE_4B_C(value) + VARHDRSZ; + toast_pointer.va_extsize = data_todo; + /* Assert that the numbers look like it's compressed */ + Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); } else - result->va_content.va_external.va_rawsize = VARSIZE(value); + { + data_p = VARDATA(value); + data_todo = VARSIZE(value) - VARHDRSZ; + toast_pointer.va_rawsize = VARSIZE(value); + toast_pointer.va_extsize = data_todo; + } - result->va_content.va_external.va_extsize = - VARSIZE(value) - VARHDRSZ; - result->va_content.va_external.va_valueid = - GetNewOidWithIndex(toastrel, toastidx); - result->va_content.va_external.va_toastrelid = - rel->rd_rel->reltoastrelid; + toast_pointer.va_valueid = GetNewOidWithIndex(toastrel, toastidx); + toast_pointer.va_toastrelid = rel->rd_rel->reltoastrelid; /* * Initialize constant parts of the tuple data */ - t_values[0] = ObjectIdGetDatum(result->va_content.va_external.va_valueid); + t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid); t_values[2] = PointerGetDatum(&chunk_data); t_isnull[0] = false; t_isnull[1] = false; t_isnull[2] = false; /* - * Get the data to process - */ - data_p = VARDATA(value); - data_todo = VARSIZE(value) - VARHDRSZ; - - /* * Split up the item into chunks */ while (data_todo > 0) @@ -1111,11 +1173,18 @@ toast_save_datum(Relation rel, Datum value, } /* - * Done - close toast relation and return the reference + * Done - close toast relation */ index_close(toastidx, RowExclusiveLock); heap_close(toastrel, RowExclusiveLock); + /* + * Create the TOAST pointer value that we'll return + */ + result = (struct varlena *) palloc(sizeof(varattrib_pointer)); + SET_VARSIZE_EXTERNAL(result); + memcpy(VARDATA_SHORT(result), &toast_pointer, sizeof(toast_pointer)); + return PointerGetDatum(result); } @@ -1129,7 +1198,8 @@ toast_save_datum(Relation rel, Datum value, static void toast_delete_datum(Relation rel, Datum value) { - varattrib *attr = (varattrib *) DatumGetPointer(value); + struct varlena *attr = (struct varlena *) DatumGetPointer(value); + struct varatt_external toast_pointer; Relation toastrel; Relation toastidx; ScanKeyData toastkey; @@ -1139,11 +1209,14 @@ toast_delete_datum(Relation rel, Datum value) if (!VARATT_IS_EXTERNAL(attr)) return; + /* Must copy to access aligned fields */ + memcpy(&toast_pointer, VARDATA_SHORT(attr), + sizeof(struct varatt_external)); + /* * Open the toast relation and its index */ - toastrel = heap_open(attr->va_content.va_external.va_toastrelid, - RowExclusiveLock); + toastrel = heap_open(toast_pointer.va_toastrelid, RowExclusiveLock); toastidx = index_open(toastrel->rd_rel->reltoastidxid, RowExclusiveLock); /* @@ -1153,7 +1226,7 @@ toast_delete_datum(Relation rel, Datum value) ScanKeyInit(&toastkey, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); + ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Find the chunks by index @@ -1180,12 +1253,12 @@ toast_delete_datum(Relation rel, Datum value) /* ---------- * toast_fetch_datum - * - * Reconstruct an in memory varattrib from the chunks saved + * Reconstruct an in memory Datum from the chunks saved * in the toast relation * ---------- */ -static varattrib * -toast_fetch_datum(varattrib *attr) +static struct varlena * +toast_fetch_datum(struct varlena *attr) { Relation toastrel; Relation toastidx; @@ -1193,28 +1266,35 @@ toast_fetch_datum(varattrib *attr) IndexScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; - varattrib *result; + struct varlena *result; + struct varatt_external toast_pointer; int32 ressize; int32 residx, nextidx; int32 numchunks; Pointer chunk; bool isnull; + char *chunkdata; int32 chunksize; - ressize = attr->va_content.va_external.va_extsize; + /* Must copy to access aligned fields */ + memcpy(&toast_pointer, VARDATA_SHORT(attr), + sizeof(struct varatt_external)); + + ressize = toast_pointer.va_extsize; numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; - result = (varattrib *) palloc(ressize + VARHDRSZ); - SET_VARSIZE(result, ressize + VARHDRSZ); - if (VARATT_IS_COMPRESSED(attr)) - VARATT_SIZEP_DEPRECATED(result) |= VARATT_FLAG_COMPRESSED; + result = (struct varlena *) palloc(ressize + VARHDRSZ); + + if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) + SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ); + else + SET_VARSIZE(result, ressize + VARHDRSZ); /* * Open the toast relation and its index */ - toastrel = heap_open(attr->va_content.va_external.va_toastrelid, - AccessShareLock); + toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); @@ -1224,7 +1304,7 @@ toast_fetch_datum(varattrib *attr) ScanKeyInit(&toastkey, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); + ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Read the chunks by index @@ -1246,7 +1326,24 @@ toast_fetch_datum(varattrib *attr) Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); - chunksize = VARSIZE(chunk) - VARHDRSZ; + if (!VARATT_IS_EXTENDED(chunk)) + { + chunksize = VARSIZE(chunk) - VARHDRSZ; + chunkdata = VARDATA(chunk); + } + else if (VARATT_IS_SHORT(chunk)) + { + /* could happen due to heap_form_tuple doing its thing */ + chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; + chunkdata = VARDATA_SHORT(chunk); + } + else + { + /* should never happen */ + elog(ERROR, "found toasted toast chunk"); + chunksize = 0; /* keep compiler quiet */ + chunkdata = NULL; + } /* * Some checks on the data we've found @@ -1254,31 +1351,35 @@ toast_fetch_datum(varattrib *attr) if (residx != nextidx) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u", residx, nextidx, - attr->va_content.va_external.va_valueid); + toast_pointer.va_valueid); if (residx < numchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) - elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", - chunksize, residx, - attr->va_content.va_external.va_valueid); + elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u", + chunksize, (int) TOAST_MAX_CHUNK_SIZE, + residx, numchunks, + toast_pointer.va_valueid); } - else if (residx < numchunks) + else if (residx == numchunks-1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize) - elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", - chunksize, residx, - attr->va_content.va_external.va_valueid); + elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u", + chunksize, + (int) (ressize - residx*TOAST_MAX_CHUNK_SIZE), + residx, + toast_pointer.va_valueid); } else - elog(ERROR, "unexpected chunk number %d for toast value %u", + elog(ERROR, "unexpected chunk number %d for toast value %u (out of range %d..%d)", residx, - attr->va_content.va_external.va_valueid); + toast_pointer.va_valueid, + 0, numchunks-1); /* * Copy the data into proper place in our result */ memcpy(VARDATA(result) + residx * TOAST_MAX_CHUNK_SIZE, - VARDATA(chunk), + chunkdata, chunksize); nextidx++; @@ -1290,7 +1391,7 @@ toast_fetch_datum(varattrib *attr) if (nextidx != numchunks) elog(ERROR, "missing chunk number %d for toast value %u", nextidx, - attr->va_content.va_external.va_valueid); + toast_pointer.va_valueid); /* * End scan and close relations @@ -1305,12 +1406,12 @@ toast_fetch_datum(varattrib *attr) /* ---------- * toast_fetch_datum_slice - * - * Reconstruct a segment of a varattrib from the chunks saved + * Reconstruct a segment of a Datum from the chunks saved * in the toast relation * ---------- */ -static varattrib * -toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) +static struct varlena * +toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length) { Relation toastrel; Relation toastidx; @@ -1319,7 +1420,8 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) IndexScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; - varattrib *result; + struct varlena *result; + struct varatt_external toast_pointer; int32 attrsize; int32 residx; int32 nextidx; @@ -1331,11 +1433,16 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) int totalchunks; Pointer chunk; bool isnull; + char *chunkdata; int32 chunksize; int32 chcpystrt; int32 chcpyend; - attrsize = attr->va_content.va_external.va_extsize; + /* Must copy to access aligned fields */ + memcpy(&toast_pointer, VARDATA_SHORT(attr), + sizeof(struct varatt_external)); + + attrsize = toast_pointer.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; if (sliceoffset >= attrsize) @@ -1347,11 +1454,12 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) if (((sliceoffset + length) > attrsize) || length < 0) length = attrsize - sliceoffset; - result = (varattrib *) palloc(length + VARHDRSZ); - SET_VARSIZE(result, length + VARHDRSZ); + result = (struct varlena *) palloc(length + VARHDRSZ); - if (VARATT_IS_COMPRESSED(attr)) - VARATT_SIZEP_DEPRECATED(result) |= VARATT_FLAG_COMPRESSED; + if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) + SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ); + else + SET_VARSIZE(result, length + VARHDRSZ); if (length == 0) return result; /* Can save a lot of work at this point! */ @@ -1366,8 +1474,7 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) /* * Open the toast relation and its index */ - toastrel = heap_open(attr->va_content.va_external.va_toastrelid, - AccessShareLock); + toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); @@ -1378,7 +1485,7 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) ScanKeyInit(&toastkey[0], (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); + ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Use equality condition for one chunk, a range condition otherwise: @@ -1421,7 +1528,24 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); - chunksize = VARSIZE(chunk) - VARHDRSZ; + if (!VARATT_IS_EXTENDED(chunk)) + { + chunksize = VARSIZE(chunk) - VARHDRSZ; + chunkdata = VARDATA(chunk); + } + else if (VARATT_IS_SHORT(chunk)) + { + /* could happen due to heap_form_tuple doing its thing */ + chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; + chunkdata = VARDATA_SHORT(chunk); + } + else + { + /* should never happen */ + elog(ERROR, "found toasted toast chunk"); + chunksize = 0; /* keep compiler quiet */ + chunkdata = NULL; + } /* * Some checks on the data we've found @@ -1429,21 +1553,29 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk)) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u", residx, nextidx, - attr->va_content.va_external.va_valueid); + toast_pointer.va_valueid); if (residx < totalchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) - elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", - chunksize, residx, - attr->va_content.va_external.va_valueid); + elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u when fetching slice", + chunksize, (int) TOAST_MAX_CHUNK_SIZE, + residx, totalchunks, + toast_pointer.va_valueid); } - else + else if (residx == totalchunks-1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize) - elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", - chunksize, residx, - attr->va_content.va_external.va_valueid); + elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u when fetching slice", + chunksize, + (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE), + residx, + toast_pointer.va_valueid); } + else + elog(ERROR, "unexpected chunk number %d for toast value %u (out of range %d..%d)", + residx, + toast_pointer.va_valueid, + 0, totalchunks-1); /* * Copy the data into proper place in our result @@ -1457,7 +1589,7 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) memcpy(VARDATA(result) + (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt, - VARDATA(chunk) + chcpystrt, + chunkdata + chcpystrt, (chcpyend - chcpystrt) + 1); nextidx++; @@ -1469,7 +1601,7 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) if (nextidx != (endchunk + 1)) elog(ERROR, "missing chunk number %d for toast value %u", nextidx, - attr->va_content.va_external.va_valueid); + toast_pointer.va_valueid); /* * End scan and close relations |