diff options
Diffstat (limited to 'src/backend/access')
-rw-r--r-- | src/backend/access/common/heaptuple.c | 447 | ||||
-rw-r--r-- | src/backend/access/common/indextuple.c | 142 | ||||
-rw-r--r-- | src/backend/access/heap/tuptoaster.c | 496 |
3 files changed, 752 insertions, 333 deletions
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index 0c83262c3b8..f1a80d6feee 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -11,12 +11,53 @@ * we can get rid of it entirely. * * + * Some notes about varlenas and this code: + * + * Before Postgres 8.3 varlenas always had a 4-byte length header, and + * therefore always needed 4-byte alignment (at least). This wasted space + * for short varlenas, for example CHAR(1) took 5 bytes and could need up to + * 3 additional padding bytes for alignment. + * + * Now, a short varlena (up to 126 data bytes) is reduced to a 1-byte header + * and we don't align it. To hide this from datatype-specific functions that + * don't want to deal with it, such a datum is considered "toasted" and will + * be expanded back to the normal 4-byte-header format by pg_detoast_datum. + * (In performance-critical code paths we can use pg_detoast_datum_packed + * and the appropriate access macros to avoid that overhead.) Note that this + * conversion is performed directly in heap_form_tuple (or heap_formtuple), + * without explicitly invoking the toaster. + * + * This change will break any code that assumes it needn't detoast values + * that have been put into a tuple but never sent to disk. Hopefully there + * are few such places. + * + * Varlenas still have alignment 'i' (or 'd') in pg_type/pg_attribute, since + * that's the normal requirement for the untoasted format. But we ignore that + * for the 1-byte-header format. This means that the actual start position + * of a varlena datum may vary depending on which format it has. To determine + * what is stored, we have to require that alignment padding bytes be zero. + * (Postgres actually has always zeroed them, but now it's required!) Since + * the first byte of a 1-byte-header varlena can never be zero, we can examine + * the first byte after the previous datum to tell if it's a pad byte or the + * start of a 1-byte-header varlena. + * + * Note that while formerly we could rely on the first varlena column of a + * system catalog to be at the offset suggested by the C struct for the + * catalog, this is now risky: it's only safe if the preceding field is + * word-aligned, so that there will never be any padding. + * + * We don't pack varlenas whose attstorage is 'p', since the data type + * isn't expecting to have to detoast values. This is used in particular + * by oidvector and int2vector, which are used in the system catalogs + * and we'd like to still refer to them via C struct offsets. + * + * * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/common/heaptuple.c,v 1.116 2007/02/27 23:48:06 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/common/heaptuple.c,v 1.117 2007/04/06 04:21:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,11 +69,20 @@ #include "executor/tuptable.h" +/* Does att's datatype allow packing into the 1-byte-header varlena format? */ +#define ATT_IS_PACKABLE(att) \ + ((att)->attlen == -1 && (att)->attstorage != 'p') +/* Use this if it's already known varlena */ +#define VARLENA_ATT_IS_PACKABLE(att) \ + ((att)->attstorage != 'p') + + /* ---------------------------------------------------------------- * misc support routines * ---------------------------------------------------------------- */ + /* * heap_compute_data_size * Determine size of the data area of a tuple to be constructed @@ -49,11 +99,29 @@ heap_compute_data_size(TupleDesc tupleDesc, for (i = 0; i < numberOfAttributes; i++) { + Datum val; + if (isnull[i]) continue; - data_length = att_align(data_length, att[i]->attalign); - data_length = att_addlength(data_length, att[i]->attlen, values[i]); + val = values[i]; + + if (ATT_IS_PACKABLE(att[i]) && + VARATT_CAN_MAKE_SHORT(DatumGetPointer(val))) + { + /* + * we're anticipating converting to a short varlena header, + * so adjust length and don't count any alignment + */ + data_length += VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(val)); + } + else + { + data_length = att_align_datum(data_length, att[i]->attalign, + att[i]->attlen, val); + data_length = att_addlength_datum(data_length, att[i]->attlen, + val); + } } return data_length; @@ -79,11 +147,29 @@ ComputeDataSize(TupleDesc tupleDesc, for (i = 0; i < numberOfAttributes; i++) { + Datum val; + if (nulls[i] != ' ') continue; - data_length = att_align(data_length, att[i]->attalign); - data_length = att_addlength(data_length, att[i]->attlen, values[i]); + val = values[i]; + + if (ATT_IS_PACKABLE(att[i]) && + VARATT_CAN_MAKE_SHORT(DatumGetPointer(val))) + { + /* + * we're anticipating converting to a short varlena header, + * so adjust length and don't count any alignment + */ + data_length += VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(val)); + } + else + { + data_length = att_align_datum(data_length, att[i]->attalign, + att[i]->attlen, val); + data_length = att_addlength_datum(data_length, att[i]->attlen, + val); + } } return data_length; @@ -95,17 +181,23 @@ ComputeDataSize(TupleDesc tupleDesc, * * We also fill the null bitmap (if any) and set the infomask bits * that reflect the tuple's data contents. + * + * NOTE: it is now REQUIRED that the caller have pre-zeroed the data area. */ void heap_fill_tuple(TupleDesc tupleDesc, Datum *values, bool *isnull, - char *data, uint16 *infomask, bits8 *bit) + char *data, Size data_size, + uint16 *infomask, bits8 *bit) { bits8 *bitP; int bitmask; int i; int numberOfAttributes = tupleDesc->natts; Form_pg_attribute *att = tupleDesc->attrs; +#ifdef USE_ASSERT_CHECKING + char *start = data; +#endif if (bit != NULL) { @@ -119,7 +211,7 @@ heap_fill_tuple(TupleDesc tupleDesc, bitmask = 0; } - *infomask &= ~(HEAP_HASNULL | HEAP_HASVARWIDTH | HEAP_HASEXTENDED); + *infomask &= ~(HEAP_HASNULL | HEAP_HASVARWIDTH | HEAP_HASEXTERNAL); for (i = 0; i < numberOfAttributes; i++) { @@ -145,36 +237,66 @@ heap_fill_tuple(TupleDesc tupleDesc, *bitP |= bitmask; } - /* XXX we are aligning the pointer itself, not the offset */ - data = (char *) att_align((long) data, att[i]->attalign); + /* + * XXX we use the att_align macros on the pointer value itself, + * not on an offset. This is a bit of a hack. + */ if (att[i]->attbyval) { /* pass-by-value */ + data = (char *) att_align_nominal((long) data, att[i]->attalign); store_att_byval(data, values[i], att[i]->attlen); data_length = att[i]->attlen; } else if (att[i]->attlen == -1) { /* varlena */ + Pointer val = DatumGetPointer(values[i]); + *infomask |= HEAP_HASVARWIDTH; - if (VARATT_IS_EXTERNAL(values[i])) + if (VARATT_IS_EXTERNAL(val)) + { *infomask |= HEAP_HASEXTERNAL; - if (VARATT_IS_COMPRESSED(values[i])) - *infomask |= HEAP_HASCOMPRESSED; - data_length = VARSIZE(DatumGetPointer(values[i])); - memcpy(data, DatumGetPointer(values[i]), data_length); + /* no alignment, since it's short by definition */ + data_length = VARSIZE_EXTERNAL(val); + memcpy(data, val, data_length); + } + else if (VARATT_IS_SHORT(val)) + { + /* no alignment for short varlenas */ + data_length = VARSIZE_SHORT(val); + memcpy(data, val, data_length); + } + else if (VARLENA_ATT_IS_PACKABLE(att[i]) && + VARATT_CAN_MAKE_SHORT(val)) + { + /* convert to short varlena -- no alignment */ + data_length = VARATT_CONVERTED_SHORT_SIZE(val); + SET_VARSIZE_SHORT(data, data_length); + memcpy(data + 1, VARDATA(val), data_length - 1); + } + else + { + /* full 4-byte header varlena */ + data = (char *) att_align_nominal((long) data, + att[i]->attalign); + data_length = VARSIZE(val); + memcpy(data, val, data_length); + } } else if (att[i]->attlen == -2) { - /* cstring */ + /* cstring ... never needs alignment */ *infomask |= HEAP_HASVARWIDTH; + Assert(att[i]->attalign == 'c'); data_length = strlen(DatumGetCString(values[i])) + 1; memcpy(data, DatumGetPointer(values[i]), data_length); } else { /* fixed-length pass-by-reference */ + data = (char *) att_align_nominal((long) data, att[i]->attalign); Assert(att[i]->attlen > 0); data_length = att[i]->attlen; memcpy(data, DatumGetPointer(values[i]), data_length); @@ -182,6 +304,8 @@ heap_fill_tuple(TupleDesc tupleDesc, data += data_length; } + + Assert((data - start) == data_size); } /* ---------------- @@ -193,18 +317,19 @@ heap_fill_tuple(TupleDesc tupleDesc, * ---------------- */ static void -DataFill(char *data, - TupleDesc tupleDesc, - Datum *values, - char *nulls, - uint16 *infomask, - bits8 *bit) +DataFill(TupleDesc tupleDesc, + Datum *values, char *nulls, + char *data, Size data_size, + uint16 *infomask, bits8 *bit) { bits8 *bitP; int bitmask; int i; int numberOfAttributes = tupleDesc->natts; Form_pg_attribute *att = tupleDesc->attrs; +#ifdef USE_ASSERT_CHECKING + char *start = data; +#endif if (bit != NULL) { @@ -218,7 +343,7 @@ DataFill(char *data, bitmask = 0; } - *infomask &= ~(HEAP_HASNULL | HEAP_HASVARWIDTH | HEAP_HASEXTENDED); + *infomask &= ~(HEAP_HASNULL | HEAP_HASVARWIDTH | HEAP_HASEXTERNAL); for (i = 0; i < numberOfAttributes; i++) { @@ -244,36 +369,66 @@ DataFill(char *data, *bitP |= bitmask; } - /* XXX we are aligning the pointer itself, not the offset */ - data = (char *) att_align((long) data, att[i]->attalign); + /* + * XXX we use the att_align macros on the pointer value itself, + * not on an offset. This is a bit of a hack. + */ if (att[i]->attbyval) { /* pass-by-value */ + data = (char *) att_align_nominal((long) data, att[i]->attalign); store_att_byval(data, values[i], att[i]->attlen); data_length = att[i]->attlen; } else if (att[i]->attlen == -1) { /* varlena */ + Pointer val = DatumGetPointer(values[i]); + *infomask |= HEAP_HASVARWIDTH; - if (VARATT_IS_EXTERNAL(values[i])) + if (VARATT_IS_EXTERNAL(val)) + { *infomask |= HEAP_HASEXTERNAL; - if (VARATT_IS_COMPRESSED(values[i])) - *infomask |= HEAP_HASCOMPRESSED; - data_length = VARSIZE(DatumGetPointer(values[i])); - memcpy(data, DatumGetPointer(values[i]), data_length); + /* no alignment, since it's short by definition */ + data_length = VARSIZE_EXTERNAL(val); + memcpy(data, val, data_length); + } + else if (VARATT_IS_SHORT(val)) + { + /* no alignment for short varlenas */ + data_length = VARSIZE_SHORT(val); + memcpy(data, val, data_length); + } + else if (VARLENA_ATT_IS_PACKABLE(att[i]) && + VARATT_CAN_MAKE_SHORT(val)) + { + /* convert to short varlena -- no alignment */ + data_length = VARATT_CONVERTED_SHORT_SIZE(val); + SET_VARSIZE_SHORT(data, data_length); + memcpy(data + 1, VARDATA(val), data_length - 1); + } + else + { + /* full 4-byte header varlena */ + data = (char *) att_align_nominal((long) data, + att[i]->attalign); + data_length = VARSIZE(val); + memcpy(data, val, data_length); + } } else if (att[i]->attlen == -2) { - /* cstring */ + /* cstring ... never needs alignment */ *infomask |= HEAP_HASVARWIDTH; + Assert(att[i]->attalign == 'c'); data_length = strlen(DatumGetCString(values[i])) + 1; memcpy(data, DatumGetPointer(values[i]), data_length); } else { /* fixed-length pass-by-reference */ + data = (char *) att_align_nominal((long) data, att[i]->attalign); Assert(att[i]->attlen > 0); data_length = att[i]->attlen; memcpy(data, DatumGetPointer(values[i]), data_length); @@ -281,6 +436,8 @@ DataFill(char *data, data += data_length; } + + Assert((data - start) == data_size); } /* ---------------------------------------------------------------- @@ -343,6 +500,8 @@ heap_attisnull(HeapTuple tup, int attnum) * the same attribute descriptor will go much quicker. -cim 5/4/91 * * NOTE: if you need to change this code, see also heap_deform_tuple. + * Also see nocache_index_getattr, which is the same code for index + * tuples. * ---------------- */ Datum @@ -353,20 +512,12 @@ nocachegetattr(HeapTuple tuple, { HeapTupleHeader tup = tuple->t_data; Form_pg_attribute *att = tupleDesc->attrs; - char *tp; /* ptr to att in tuple */ + char *tp; /* ptr to data part of tuple */ bits8 *bp = tup->t_bits; /* ptr to null bitmap in tuple */ - bool slow = false; /* do we have to walk nulls? */ + bool slow = false; /* do we have to walk attrs? */ + int off; /* current offset within data */ (void) isnull; /* not used */ -#ifdef IN_MACRO -/* This is handled in the macro */ - Assert(attnum > 0); - - if (isnull) - *isnull = false; -#endif - - attnum--; /* ---------------- * Three cases: @@ -377,11 +528,21 @@ nocachegetattr(HeapTuple tuple, * ---------------- */ +#ifdef IN_MACRO +/* This is handled in the macro */ + Assert(attnum > 0); + + if (isnull) + *isnull = false; +#endif + + attnum--; + if (HeapTupleNoNulls(tuple)) { #ifdef IN_MACRO /* This is handled in the macro */ - if (att[attnum]->attcacheoff != -1) + if (att[attnum]->attcacheoff >= 0) { return fetchatt(att[attnum], (char *) tup + tup->t_hoff + @@ -436,24 +597,27 @@ nocachegetattr(HeapTuple tuple, tp = (char *) tup + tup->t_hoff; - /* - * now check for any non-fixed length attrs before our attribute - */ if (!slow) { - if (att[attnum]->attcacheoff != -1) + /* + * If we get here, there are no nulls up to and including the target + * attribute. If we have a cached offset, we can use it. + */ + if (att[attnum]->attcacheoff >= 0) { return fetchatt(att[attnum], tp + att[attnum]->attcacheoff); } - else if (HeapTupleHasVarWidth(tuple)) + + /* + * Otherwise, check for non-fixed-length attrs up to and including + * target. If there aren't any, it's safe to cheaply initialize + * the cached offsets for these attrs. + */ + if (HeapTupleHasVarWidth(tuple)) { int j; - /* - * In for(), we test <= and not < because we want to see if we can - * go past it in initializing offsets. - */ for (j = 0; j <= attnum; j++) { if (att[j]->attlen <= 0) @@ -465,89 +629,109 @@ nocachegetattr(HeapTuple tuple, } } - /* - * If slow is false, and we got here, we know that we have a tuple with no - * nulls or var-widths before the target attribute. If possible, we also - * want to initialize the remainder of the attribute cached offset values. - */ if (!slow) { + int natts = tupleDesc->natts; int j = 1; - long off; - int natts = HeapTupleHeaderGetNatts(tup); /* - * need to set cache for some atts + * If we get here, we have a tuple with no nulls or var-widths up to + * and including the target attribute, so we can use the cached offset + * ... only we don't have it yet, or we'd not have got here. Since + * it's cheap to compute offsets for fixed-width columns, we take the + * opportunity to initialize the cached offsets for *all* the leading + * fixed-width columns, in hope of avoiding future visits to this + * routine. */ - att[0]->attcacheoff = 0; - while (j < attnum && att[j]->attcacheoff > 0) + /* we might have set some offsets in the slow path previously */ + while (j < natts && att[j]->attcacheoff > 0) j++; off = att[j - 1]->attcacheoff + att[j - 1]->attlen; - for (; j <= attnum || - /* Can we compute more? We will probably need them */ - (j < natts && - att[j]->attcacheoff == -1 && - (HeapTupleNoNulls(tuple) || !att_isnull(j, bp)) && - (HeapTupleAllFixed(tuple) || att[j]->attlen > 0)); j++) + for (; j < natts; j++) { - off = att_align(off, att[j]->attalign); + if (att[j]->attlen <= 0) + break; + + off = att_align_nominal(off, att[j]->attalign); att[j]->attcacheoff = off; - off = att_addlength(off, att[j]->attlen, tp + off); + off += att[j]->attlen; } - return fetchatt(att[attnum], tp + att[attnum]->attcacheoff); + Assert(j > attnum); + + off = att[attnum]->attcacheoff; } else { bool usecache = true; - int off = 0; int i; /* - * Now we know that we have to walk the tuple CAREFULLY. + * Now we know that we have to walk the tuple CAREFULLY. But we + * still might be able to cache some offsets for next time. * * Note - This loop is a little tricky. For each non-null attribute, * we have to first account for alignment padding before the attr, * then advance over the attr based on its length. Nulls have no * storage and no alignment padding either. We can use/set - * attcacheoff until we pass either a null or a var-width attribute. + * attcacheoff until we reach either a null or a var-width attribute. */ - - for (i = 0; i < attnum; i++) + off = 0; + for (i = 0; ; i++) /* loop exit is at "break" */ { if (HeapTupleHasNulls(tuple) && att_isnull(i, bp)) { usecache = false; - continue; + continue; /* this cannot be the target att */ } - /* If we know the next offset, we can skip the alignment calc */ - if (usecache && att[i]->attcacheoff != -1) + /* If we know the next offset, we can skip the rest */ + if (usecache && att[i]->attcacheoff >= 0) off = att[i]->attcacheoff; + else if (att[i]->attlen == -1) + { + /* + * We can only cache the offset for a varlena attribute + * if the offset is already suitably aligned, so that there + * would be no pad bytes in any case: then the offset will + * be valid for either an aligned or unaligned value. + */ + if (usecache && + off == att_align_nominal(off, att[i]->attalign)) + att[i]->attcacheoff = off; + else + { + off = att_align_pointer(off, att[i]->attalign, -1, + tp + off); + usecache = false; + } + } else { - off = att_align(off, att[i]->attalign); + /* not varlena, so safe to use att_align_nominal */ + off = att_align_nominal(off, att[i]->attalign); if (usecache) att[i]->attcacheoff = off; } - off = att_addlength(off, att[i]->attlen, tp + off); + if (i == attnum) + break; + + off = att_addlength_pointer(off, att[i]->attlen, tp + off); if (usecache && att[i]->attlen <= 0) usecache = false; } - - off = att_align(off, att[attnum]->attalign); - - return fetchatt(att[attnum], tp + off); } + + return fetchatt(att[attnum], tp + off); } /* ---------------- @@ -671,7 +855,7 @@ heap_form_tuple(TupleDesc tupleDescriptor, { HeapTuple tuple; /* return tuple */ HeapTupleHeader td; /* tuple data */ - unsigned long len; + Size len, data_len; int hoff; bool hasnull = false; Form_pg_attribute *att = tupleDescriptor->attrs; @@ -723,7 +907,9 @@ heap_form_tuple(TupleDesc tupleDescriptor, hoff = len = MAXALIGN(len); /* align user data safely */ - len += heap_compute_data_size(tupleDescriptor, values, isnull); + data_len = heap_compute_data_size(tupleDescriptor, values, isnull); + + len += data_len; /* * Allocate and zero the space needed. Note that the tuple body and @@ -754,6 +940,7 @@ heap_form_tuple(TupleDesc tupleDescriptor, values, isnull, (char *) td + hoff, + data_len, &td->t_infomask, (hasnull ? td->t_bits : NULL)); @@ -778,7 +965,7 @@ heap_formtuple(TupleDesc tupleDescriptor, { HeapTuple tuple; /* return tuple */ HeapTupleHeader td; /* tuple data */ - unsigned long len; + Size len, data_len; int hoff; bool hasnull = false; Form_pg_attribute *att = tupleDescriptor->attrs; @@ -830,7 +1017,9 @@ heap_formtuple(TupleDesc tupleDescriptor, hoff = len = MAXALIGN(len); /* align user data safely */ - len += ComputeDataSize(tupleDescriptor, values, nulls); + data_len = ComputeDataSize(tupleDescriptor, values, nulls); + + len += data_len; /* * Allocate and zero the space needed. Note that the tuple body and @@ -857,16 +1046,18 @@ heap_formtuple(TupleDesc tupleDescriptor, if (tupleDescriptor->tdhasoid) /* else leave infomask = 0 */ td->t_infomask = HEAP_HASOID; - DataFill((char *) td + hoff, - tupleDescriptor, + DataFill(tupleDescriptor, values, nulls, + (char *) td + hoff, + data_len, &td->t_infomask, (hasnull ? td->t_bits : NULL)); return tuple; } + /* * heap_modify_tuple * form a new tuple from an old tuple and a set of replacement values. @@ -1069,9 +1260,28 @@ heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, if (!slow && thisatt->attcacheoff >= 0) off = thisatt->attcacheoff; + else if (thisatt->attlen == -1) + { + /* + * We can only cache the offset for a varlena attribute + * if the offset is already suitably aligned, so that there + * would be no pad bytes in any case: then the offset will + * be valid for either an aligned or unaligned value. + */ + if (!slow && + off == att_align_nominal(off, thisatt->attalign)) + thisatt->attcacheoff = off; + else + { + off = att_align_pointer(off, thisatt->attalign, -1, + tp + off); + slow = true; + } + } else { - off = att_align(off, thisatt->attalign); + /* not varlena, so safe to use att_align_nominal */ + off = att_align_nominal(off, thisatt->attalign); if (!slow) thisatt->attcacheoff = off; @@ -1079,7 +1289,7 @@ heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, values[attnum] = fetchatt(thisatt, tp + off); - off = att_addlength(off, thisatt->attlen, tp + off); + off = att_addlength_pointer(off, thisatt->attlen, tp + off); if (thisatt->attlen <= 0) slow = true; /* can't use attcacheoff anymore */ @@ -1162,9 +1372,28 @@ heap_deformtuple(HeapTuple tuple, if (!slow && thisatt->attcacheoff >= 0) off = thisatt->attcacheoff; + else if (thisatt->attlen == -1) + { + /* + * We can only cache the offset for a varlena attribute + * if the offset is already suitably aligned, so that there + * would be no pad bytes in any case: then the offset will + * be valid for either an aligned or unaligned value. + */ + if (!slow && + off == att_align_nominal(off, thisatt->attalign)) + thisatt->attcacheoff = off; + else + { + off = att_align_pointer(off, thisatt->attalign, -1, + tp + off); + slow = true; + } + } else { - off = att_align(off, thisatt->attalign); + /* not varlena, so safe to use att_align_nominal */ + off = att_align_nominal(off, thisatt->attalign); if (!slow) thisatt->attcacheoff = off; @@ -1172,7 +1401,7 @@ heap_deformtuple(HeapTuple tuple, values[attnum] = fetchatt(thisatt, tp + off); - off = att_addlength(off, thisatt->attlen, tp + off); + off = att_addlength_pointer(off, thisatt->attlen, tp + off); if (thisatt->attlen <= 0) slow = true; /* can't use attcacheoff anymore */ @@ -1252,9 +1481,28 @@ slot_deform_tuple(TupleTableSlot *slot, int natts) if (!slow && thisatt->attcacheoff >= 0) off = thisatt->attcacheoff; + else if (thisatt->attlen == -1) + { + /* + * We can only cache the offset for a varlena attribute + * if the offset is already suitably aligned, so that there + * would be no pad bytes in any case: then the offset will + * be valid for either an aligned or unaligned value. + */ + if (!slow && + off == att_align_nominal(off, thisatt->attalign)) + thisatt->attcacheoff = off; + else + { + off = att_align_pointer(off, thisatt->attalign, -1, + tp + off); + slow = true; + } + } else { - off = att_align(off, thisatt->attalign); + /* not varlena, so safe to use att_align_nominal */ + off = att_align_nominal(off, thisatt->attalign); if (!slow) thisatt->attcacheoff = off; @@ -1262,7 +1510,7 @@ slot_deform_tuple(TupleTableSlot *slot, int natts) values[attnum] = fetchatt(thisatt, tp + off); - off = att_addlength(off, thisatt->attlen, tp + off); + off = att_addlength_pointer(off, thisatt->attlen, tp + off); if (thisatt->attlen <= 0) slow = true; /* can't use attcacheoff anymore */ @@ -1543,7 +1791,7 @@ heap_form_minimal_tuple(TupleDesc tupleDescriptor, bool *isnull) { MinimalTuple tuple; /* return tuple */ - unsigned long len; + Size len, data_len; int hoff; bool hasnull = false; Form_pg_attribute *att = tupleDescriptor->attrs; @@ -1595,7 +1843,9 @@ heap_form_minimal_tuple(TupleDesc tupleDescriptor, hoff = len = MAXALIGN(len); /* align user data safely */ - len += heap_compute_data_size(tupleDescriptor, values, isnull); + data_len = heap_compute_data_size(tupleDescriptor, values, isnull); + + len += data_len; /* * Allocate and zero the space needed. @@ -1616,6 +1866,7 @@ heap_form_minimal_tuple(TupleDesc tupleDescriptor, values, isnull, (char *) tuple + hoff, + data_len, &tuple->t_infomask, (hasnull ? tuple->t_bits : NULL)); diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c index c83e34834ca..471d28c28c4 100644 --- a/src/backend/access/common/indextuple.c +++ b/src/backend/access/common/indextuple.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/common/indextuple.c,v 1.81 2007/02/27 23:48:06 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/common/indextuple.c,v 1.82 2007/04/06 04:21:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -38,6 +38,7 @@ index_form_tuple(TupleDesc tupleDescriptor, char *tp; /* tuple pointer */ IndexTuple tuple; /* return tuple */ Size size, + data_size, hoff; int i; unsigned short infomask = 0; @@ -74,9 +75,9 @@ index_form_tuple(TupleDesc tupleDescriptor, */ if (VARATT_IS_EXTERNAL(values[i])) { - untoasted_values[i] = PointerGetDatum( - heap_tuple_fetch_attr( - (varattrib *) DatumGetPointer(values[i]))); + untoasted_values[i] = + PointerGetDatum(heap_tuple_fetch_attr((struct varlena *) + DatumGetPointer(values[i]))); untoasted_free[i] = true; } @@ -84,8 +85,8 @@ index_form_tuple(TupleDesc tupleDescriptor, * If value is above size target, and is of a compressible datatype, * try to compress it in-line. */ - if (VARSIZE(untoasted_values[i]) > TOAST_INDEX_TARGET && - !VARATT_IS_EXTENDED(untoasted_values[i]) && + if (!VARATT_IS_EXTENDED(untoasted_values[i]) && + VARSIZE(untoasted_values[i]) > TOAST_INDEX_TARGET && (att->attstorage == 'x' || att->attstorage == 'm')) { Datum cvalue = toast_compress_datum(untoasted_values[i]); @@ -116,12 +117,13 @@ index_form_tuple(TupleDesc tupleDescriptor, hoff = IndexInfoFindDataOffset(infomask); #ifdef TOAST_INDEX_HACK - size = hoff + heap_compute_data_size(tupleDescriptor, - untoasted_values, isnull); + data_size = heap_compute_data_size(tupleDescriptor, + untoasted_values, isnull); #else - size = hoff + heap_compute_data_size(tupleDescriptor, - values, isnull); + data_size = heap_compute_data_size(tupleDescriptor, + values, isnull); #endif + size = hoff + data_size; size = MAXALIGN(size); /* be conservative */ tp = (char *) palloc0(size); @@ -135,6 +137,7 @@ index_form_tuple(TupleDesc tupleDescriptor, #endif isnull, (char *) tp + hoff, + data_size, &tupmask, (hasnull ? (bits8 *) tp + sizeof(IndexTupleData) : NULL)); @@ -201,17 +204,14 @@ nocache_index_getattr(IndexTuple tup, bool *isnull) { Form_pg_attribute *att = tupleDesc->attrs; - char *tp; /* ptr to att in tuple */ - bits8 *bp = NULL; /* ptr to null bitmask in tuple */ - bool slow = false; /* do we have to walk nulls? */ + char *tp; /* ptr to data part of tuple */ + bits8 *bp = NULL; /* ptr to null bitmap in tuple */ + bool slow = false; /* do we have to walk attrs? */ int data_off; /* tuple data offset */ + int off; /* current offset within data */ (void) isnull; /* not used */ - /* - * sanity checks - */ - /* ---------------- * Three cases: * @@ -237,7 +237,7 @@ nocache_index_getattr(IndexTuple tup, { #ifdef IN_MACRO /* This is handled in the macro */ - if (att[attnum]->attcacheoff != -1) + if (att[attnum]->attcacheoff >= 0) { return fetchatt(att[attnum], (char *) tup + data_off + @@ -295,21 +295,28 @@ nocache_index_getattr(IndexTuple tup, tp = (char *) tup + data_off; - /* - * now check for any non-fixed length attrs before our attribute - */ if (!slow) { - if (att[attnum]->attcacheoff != -1) + /* + * If we get here, there are no nulls up to and including the target + * attribute. If we have a cached offset, we can use it. + */ + if (att[attnum]->attcacheoff >= 0) { return fetchatt(att[attnum], tp + att[attnum]->attcacheoff); } - else if (IndexTupleHasVarwidths(tup)) + + /* + * Otherwise, check for non-fixed-length attrs up to and including + * target. If there aren't any, it's safe to cheaply initialize + * the cached offsets for these attrs. + */ + if (IndexTupleHasVarwidths(tup)) { int j; - for (j = 0; j < attnum; j++) + for (j = 0; j <= attnum; j++) { if (att[j]->attlen <= 0) { @@ -320,80 +327,109 @@ nocache_index_getattr(IndexTuple tup, } } - /* - * If slow is false, and we got here, we know that we have a tuple with no - * nulls or var-widths before the target attribute. If possible, we also - * want to initialize the remainder of the attribute cached offset values. - */ if (!slow) { + int natts = tupleDesc->natts; int j = 1; - long off; /* - * need to set cache for some atts + * If we get here, we have a tuple with no nulls or var-widths up to + * and including the target attribute, so we can use the cached offset + * ... only we don't have it yet, or we'd not have got here. Since + * it's cheap to compute offsets for fixed-width columns, we take the + * opportunity to initialize the cached offsets for *all* the leading + * fixed-width columns, in hope of avoiding future visits to this + * routine. */ - att[0]->attcacheoff = 0; - while (j < attnum && att[j]->attcacheoff > 0) + /* we might have set some offsets in the slow path previously */ + while (j < natts && att[j]->attcacheoff > 0) j++; off = att[j - 1]->attcacheoff + att[j - 1]->attlen; - for (; j <= attnum; j++) + for (; j < natts; j++) { - off = att_align(off, att[j]->attalign); + if (att[j]->attlen <= 0) + break; + + off = att_align_nominal(off, att[j]->attalign); att[j]->attcacheoff = off; off += att[j]->attlen; } - return fetchatt(att[attnum], tp + att[attnum]->attcacheoff); + Assert(j > attnum); + + off = att[attnum]->attcacheoff; } else { bool usecache = true; - int off = 0; int i; /* - * Now we know that we have to walk the tuple CAREFULLY. + * Now we know that we have to walk the tuple CAREFULLY. But we + * still might be able to cache some offsets for next time. + * + * Note - This loop is a little tricky. For each non-null attribute, + * we have to first account for alignment padding before the attr, + * then advance over the attr based on its length. Nulls have no + * storage and no alignment padding either. We can use/set + * attcacheoff until we reach either a null or a var-width attribute. */ - - for (i = 0; i < attnum; i++) + off = 0; + for (i = 0; ; i++) /* loop exit is at "break" */ { - if (IndexTupleHasNulls(tup)) + if (IndexTupleHasNulls(tup) && att_isnull(i, bp)) { - if (att_isnull(i, bp)) - { - usecache = false; - continue; - } + usecache = false; + continue; /* this cannot be the target att */ } /* If we know the next offset, we can skip the rest */ - if (usecache && att[i]->attcacheoff != -1) + if (usecache && att[i]->attcacheoff >= 0) off = att[i]->attcacheoff; + else if (att[i]->attlen == -1) + { + /* + * We can only cache the offset for a varlena attribute + * if the offset is already suitably aligned, so that there + * would be no pad bytes in any case: then the offset will + * be valid for either an aligned or unaligned value. + */ + if (usecache && + off == att_align_nominal(off, att[i]->attalign)) + att[i]->attcacheoff = off; + else + { + off = att_align_pointer(off, att[i]->attalign, -1, + tp + off); + usecache = false; + } + } else { - off = att_align(off, att[i]->attalign); + /* not varlena, so safe to use att_align_nominal */ + off = att_align_nominal(off, att[i]->attalign); if (usecache) att[i]->attcacheoff = off; } - off = att_addlength(off, att[i]->attlen, tp + off); + if (i == attnum) + break; + + off = att_addlength_pointer(off, att[i]->attlen, tp + off); if (usecache && att[i]->attlen <= 0) usecache = false; } - - off = att_align(off, att[attnum]->attalign); - - return fetchatt(att[attnum], tp + off); } + + return fetchatt(att[attnum], tp + off); } /* diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 334d6700423..1a3c01bcac9 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.73 2007/04/03 04:14:26 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.74 2007/04/06 04:21:41 tgl Exp $ * * * INTERFACE ROUTINES @@ -42,25 +42,39 @@ #undef TOAST_DEBUG +/* + * Testing whether an externally-stored value is compressed now requires + * comparing extsize (the actual length of the external data) to rawsize + * (the original uncompressed datum's size). The latter includes VARHDRSZ + * overhead, the former doesn't. We never use compression unless it actually + * saves space, so we expect either equality or less-than. + */ +#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \ + ((toast_pointer).va_extsize < (toast_pointer).va_rawsize - VARHDRSZ) + static void toast_delete_datum(Relation rel, Datum value); static Datum toast_save_datum(Relation rel, Datum value, bool use_wal, bool use_fsm); -static varattrib *toast_fetch_datum(varattrib *attr); -static varattrib *toast_fetch_datum_slice(varattrib *attr, +static struct varlena *toast_fetch_datum(struct varlena *attr); +static struct varlena *toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length); /* ---------- * heap_tuple_fetch_attr - * - * Public entry point to get back a toasted value + * Public entry point to get back a toasted value from * external storage (possibly still in compressed format). - * ---------- + * + * This will return a datum that contains all the data internally, ie, not + * relying on external storage, but it can still be compressed or have a short + * header. + ---------- */ -varattrib * -heap_tuple_fetch_attr(varattrib *attr) +struct varlena * +heap_tuple_fetch_attr(struct varlena *attr) { - varattrib *result; + struct varlena *result; if (VARATT_IS_EXTERNAL(attr)) { @@ -88,35 +102,25 @@ heap_tuple_fetch_attr(varattrib *attr) * or external storage. * ---------- */ -varattrib * -heap_tuple_untoast_attr(varattrib *attr) +struct varlena * +heap_tuple_untoast_attr(struct varlena *attr) { - varattrib *result; - if (VARATT_IS_EXTERNAL(attr)) { + /* + * This is an externally stored datum --- fetch it back from there + */ + attr = toast_fetch_datum(attr); + /* If it's compressed, decompress it */ if (VARATT_IS_COMPRESSED(attr)) { - /* ---------- - * This is an external stored compressed value - * Fetch it from the toast heap and decompress. - * ---------- - */ - PGLZ_Header *tmp; + PGLZ_Header *tmp = (PGLZ_Header *) attr; - tmp = (PGLZ_Header *) toast_fetch_datum(attr); - result = (varattrib *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); - SET_VARSIZE(result, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); - pglz_decompress(tmp, VARDATA(result)); + attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); + SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); + pglz_decompress(tmp, VARDATA(attr)); pfree(tmp); } - else - { - /* - * This is an external stored plain value - */ - result = toast_fetch_datum(attr); - } } else if (VARATT_IS_COMPRESSED(attr)) { @@ -125,18 +129,26 @@ heap_tuple_untoast_attr(varattrib *attr) */ PGLZ_Header *tmp = (PGLZ_Header *) attr; - result = (varattrib *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); - SET_VARSIZE(result, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); - pglz_decompress(tmp, VARDATA(result)); + attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); + SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); + pglz_decompress(tmp, VARDATA(attr)); } - else - + else if (VARATT_IS_SHORT(attr)) + { /* - * This is a plain value inside of the main tuple - why am I called? + * This is a short-header varlena --- convert to 4-byte header format */ - return attr; + Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT; + Size new_size = data_size + VARHDRSZ; + struct varlena *new_attr; + + new_attr = (struct varlena *) palloc(new_size); + SET_VARSIZE(new_attr, new_size); + memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size); + attr = new_attr; + } - return result; + return attr; } @@ -147,44 +159,57 @@ heap_tuple_untoast_attr(varattrib *attr) * from compression or external storage. * ---------- */ -varattrib * -heap_tuple_untoast_attr_slice(varattrib *attr, int32 sliceoffset, int32 slicelength) +struct varlena * +heap_tuple_untoast_attr_slice(struct varlena *attr, + int32 sliceoffset, int32 slicelength) { - varattrib *preslice; - varattrib *result; + struct varlena *preslice; + struct varlena *result; + char *attrdata; int32 attrsize; - if (VARATT_IS_COMPRESSED(attr)) + if (VARATT_IS_EXTERNAL(attr)) { - PGLZ_Header *tmp; + struct varatt_external toast_pointer; - if (VARATT_IS_EXTERNAL(attr)) - tmp = (PGLZ_Header *) toast_fetch_datum(attr); - else - tmp = (PGLZ_Header *) attr; /* compressed in main tuple */ + memcpy(&toast_pointer, VARDATA_SHORT(attr), sizeof(toast_pointer)); + + /* fast path for non-compressed external datums */ + if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) + return toast_fetch_datum_slice(attr, sliceoffset, slicelength); - preslice = (varattrib *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); - SET_VARSIZE(preslice, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); + /* fetch it back (compressed marker will get set automatically) */ + preslice = toast_fetch_datum(attr); + } + else + preslice = attr; + + if (VARATT_IS_COMPRESSED(preslice)) + { + PGLZ_Header *tmp = (PGLZ_Header *) preslice; + Size size = PGLZ_RAW_SIZE(tmp) + VARHDRSZ; + + preslice = (struct varlena *) palloc(size); + SET_VARSIZE(preslice, size); pglz_decompress(tmp, VARDATA(preslice)); if (tmp != (PGLZ_Header *) attr) pfree(tmp); } + + if (VARATT_IS_SHORT(preslice)) + { + attrdata = VARDATA_SHORT(preslice); + attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT; + } else { - /* Plain value */ - if (VARATT_IS_EXTERNAL(attr)) - { - /* fast path */ - return toast_fetch_datum_slice(attr, sliceoffset, slicelength); - } - else - preslice = attr; + attrdata = VARDATA(preslice); + attrsize = VARSIZE(preslice) - VARHDRSZ; } /* slicing of datum for compressed cases and plain value */ - attrsize = VARSIZE(preslice) - VARHDRSZ; if (sliceoffset >= attrsize) { sliceoffset = 0; @@ -194,10 +219,10 @@ heap_tuple_untoast_attr_slice(varattrib *attr, int32 sliceoffset, int32 slicelen if (((sliceoffset + slicelength) > attrsize) || slicelength < 0) slicelength = attrsize - sliceoffset; - result = (varattrib *) palloc(slicelength + VARHDRSZ); + result = (struct varlena *) palloc(slicelength + VARHDRSZ); SET_VARSIZE(result, slicelength + VARHDRSZ); - memcpy(VARDATA(result), VARDATA(preslice) + sliceoffset, slicelength); + memcpy(VARDATA(result), attrdata + sliceoffset, slicelength); if (preslice != attr) pfree(preslice); @@ -210,29 +235,35 @@ heap_tuple_untoast_attr_slice(varattrib *attr, int32 sliceoffset, int32 slicelen * toast_raw_datum_size - * * Return the raw (detoasted) size of a varlena datum + * (including the VARHDRSZ header) * ---------- */ Size toast_raw_datum_size(Datum value) { - varattrib *attr = (varattrib *) DatumGetPointer(value); + struct varlena *attr = (struct varlena *) DatumGetPointer(value); Size result; - if (VARATT_IS_COMPRESSED(attr)) + if (VARATT_IS_EXTERNAL(attr)) { - /* - * va_rawsize shows the original data size, whether the datum is - * external or not. - */ - result = attr->va_content.va_compressed.va_rawsize + VARHDRSZ; + /* va_rawsize is the size of the original datum -- including header */ + struct varatt_external toast_pointer; + + memcpy(&toast_pointer, VARDATA_SHORT(attr), sizeof(toast_pointer)); + result = toast_pointer.va_rawsize; } - else if (VARATT_IS_EXTERNAL(attr)) + else if (VARATT_IS_COMPRESSED(attr)) + { + /* here, va_rawsize is just the payload size */ + result = VARRAWSIZE_4B_C(attr) + VARHDRSZ; + } + else if (VARATT_IS_SHORT(attr)) { /* - * an uncompressed external attribute has rawsize including the header - * (not too consistent!) + * we have to normalize the header length to VARHDRSZ or else the + * callers of this function will be confused. */ - result = attr->va_content.va_external.va_rawsize; + result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ; } else { @@ -251,7 +282,7 @@ toast_raw_datum_size(Datum value) Size toast_datum_size(Datum value) { - varattrib *attr = (varattrib *) DatumGetPointer(value); + struct varlena *attr = (struct varlena *) DatumGetPointer(value); Size result; if (VARATT_IS_EXTERNAL(attr)) @@ -261,7 +292,14 @@ toast_datum_size(Datum value) * compressed or not. We do not count the size of the toast pointer * ... should we? */ - result = attr->va_content.va_external.va_extsize; + struct varatt_external toast_pointer; + + memcpy(&toast_pointer, VARDATA_SHORT(attr), sizeof(toast_pointer)); + result = toast_pointer.va_extsize; + } + else if (VARATT_IS_SHORT(attr)) + { + result = VARSIZE_SHORT(attr); } else { @@ -413,16 +451,16 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, for (i = 0; i < numAttrs; i++) { - varattrib *old_value; - varattrib *new_value; + struct varlena *old_value; + struct varlena *new_value; if (oldtup != NULL) { /* * For UPDATE get the old and new values of this attribute */ - old_value = (varattrib *) DatumGetPointer(toast_oldvalues[i]); - new_value = (varattrib *) DatumGetPointer(toast_values[i]); + old_value = (struct varlena *) DatumGetPointer(toast_oldvalues[i]); + new_value = (struct varlena *) DatumGetPointer(toast_values[i]); /* * If the old value is an external stored one, check if it has @@ -432,10 +470,9 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, VARATT_IS_EXTERNAL(old_value)) { if (toast_isnull[i] || !VARATT_IS_EXTERNAL(new_value) || - old_value->va_content.va_external.va_valueid != - new_value->va_content.va_external.va_valueid || - old_value->va_content.va_external.va_toastrelid != - new_value->va_content.va_external.va_toastrelid) + memcmp(VARDATA_SHORT(old_value), + VARDATA_SHORT(new_value), + sizeof(struct varatt_external)) != 0) { /* * The old external stored value isn't needed any more @@ -452,7 +489,6 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, * tuple. */ toast_action[i] = 'p'; - toast_sizes[i] = VARSIZE(toast_values[i]); continue; } } @@ -462,7 +498,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, /* * For INSERT simply get the new value */ - new_value = (varattrib *) DatumGetPointer(toast_values[i]); + new_value = (struct varlena *) DatumGetPointer(toast_values[i]); } /* @@ -503,7 +539,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, /* * Remember the size of this attribute */ - toast_sizes[i] = VARSIZE(new_value); + toast_sizes[i] = VARSIZE_ANY(new_value); } else { @@ -542,7 +578,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, toast_values, toast_isnull) > maxDataLen) { int biggest_attno = -1; - int32 biggest_size = MAXALIGN(sizeof(varattrib)); + int32 biggest_size = MAXALIGN(sizeof(varattrib_pointer)); Datum old_value; Datum new_value; @@ -553,7 +589,9 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, { if (toast_action[i] != ' ') continue; - if (VARATT_IS_EXTENDED(toast_values[i])) + if (VARATT_IS_EXTERNAL(toast_values[i])) + continue; + if (VARATT_IS_COMPRESSED(toast_values[i])) continue; if (att[i]->attstorage != 'x') continue; @@ -603,7 +641,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, rel->rd_rel->reltoastrelid != InvalidOid) { int biggest_attno = -1; - int32 biggest_size = MAXALIGN(sizeof(varattrib)); + int32 biggest_size = MAXALIGN(sizeof(varattrib_pointer)); Datum old_value; /*------ @@ -639,9 +677,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, use_wal, use_fsm); if (toast_free[i]) pfree(DatumGetPointer(old_value)); - toast_free[i] = true; - toast_sizes[i] = VARSIZE(toast_values[i]); need_change = true; need_free = true; @@ -655,7 +691,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, toast_values, toast_isnull) > maxDataLen) { int biggest_attno = -1; - int32 biggest_size = MAXALIGN(sizeof(varattrib)); + int32 biggest_size = MAXALIGN(sizeof(varattrib_pointer)); Datum old_value; Datum new_value; @@ -666,7 +702,9 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, { if (toast_action[i] != ' ') continue; - if (VARATT_IS_EXTENDED(toast_values[i])) + if (VARATT_IS_EXTERNAL(toast_values[i])) + continue; + if (VARATT_IS_COMPRESSED(toast_values[i])) continue; if (att[i]->attstorage != 'm') continue; @@ -715,7 +753,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, rel->rd_rel->reltoastrelid != InvalidOid) { int biggest_attno = -1; - int32 biggest_size = MAXALIGN(sizeof(varattrib)); + int32 biggest_size = MAXALIGN(sizeof(varattrib_pointer)); Datum old_value; /*-------- @@ -768,6 +806,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, HeapTupleHeader olddata = newtup->t_data; HeapTupleHeader new_data; int32 new_len; + int32 new_data_len; /* * Calculate the new size of the tuple. Header size should not @@ -780,8 +819,9 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, new_len += sizeof(Oid); new_len = MAXALIGN(new_len); Assert(new_len == olddata->t_hoff); - new_len += heap_compute_data_size(tupleDesc, - toast_values, toast_isnull); + new_data_len = heap_compute_data_size(tupleDesc, + toast_values, toast_isnull); + new_len += new_data_len; /* * Allocate and zero the space needed, and fill HeapTupleData fields. @@ -802,6 +842,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, toast_values, toast_isnull, (char *) new_data + olddata->t_hoff, + new_data_len, &(new_data->t_infomask), has_nulls ? new_data->t_bits : NULL); } @@ -835,6 +876,9 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, * This must be invoked on any potentially-composite field that is to be * inserted into a tuple. Doing this preserves the invariant that toasting * goes only one level deep in a tuple. + * + * Note that flattening does not mean expansion of short-header varlenas, + * so in one sense toasting is allowed within composite datums. * ---------- */ Datum @@ -845,6 +889,7 @@ toast_flatten_tuple_attribute(Datum value, HeapTupleHeader olddata; HeapTupleHeader new_data; int32 new_len; + int32 new_data_len; HeapTupleData tmptup; Form_pg_attribute *att; int numAttrs; @@ -891,10 +936,11 @@ toast_flatten_tuple_attribute(Datum value, has_nulls = true; else if (att[i]->attlen == -1) { - varattrib *new_value; + struct varlena *new_value; - new_value = (varattrib *) DatumGetPointer(toast_values[i]); - if (VARATT_IS_EXTENDED(new_value)) + new_value = (struct varlena *) DatumGetPointer(toast_values[i]); + if (VARATT_IS_EXTERNAL(new_value) || + VARATT_IS_COMPRESSED(new_value)) { new_value = heap_tuple_untoast_attr(new_value); toast_values[i] = PointerGetDatum(new_value); @@ -924,7 +970,9 @@ toast_flatten_tuple_attribute(Datum value, new_len += sizeof(Oid); new_len = MAXALIGN(new_len); Assert(new_len == olddata->t_hoff); - new_len += heap_compute_data_size(tupleDesc, toast_values, toast_isnull); + new_data_len = heap_compute_data_size(tupleDesc, + toast_values, toast_isnull); + new_len += new_data_len; new_data = (HeapTupleHeader) palloc0(new_len); @@ -939,6 +987,7 @@ toast_flatten_tuple_attribute(Datum value, toast_values, toast_isnull, (char *) new_data + olddata->t_hoff, + new_data_len, &(new_data->t_infomask), has_nulls ? new_data->t_bits : NULL); @@ -962,21 +1011,26 @@ toast_flatten_tuple_attribute(Datum value, * If we fail (ie, compressed result is actually bigger than original) * then return NULL. We must not use compressed data if it'd expand * the tuple! + * + * We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without + * copying them. But we can't handle external or compressed datums. * ---------- */ Datum toast_compress_datum(Datum value) { - varattrib *tmp; - int32 valsize = VARSIZE(value) - VARHDRSZ; + struct varlena *tmp; + int32 valsize = VARSIZE_ANY_EXHDR(value); - tmp = (varattrib *) palloc(PGLZ_MAX_OUTPUT(valsize)); - if (pglz_compress(VARDATA(value), valsize, + Assert(!VARATT_IS_EXTERNAL(value)); + Assert(!VARATT_IS_COMPRESSED(value)); + + tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize)); + if (pglz_compress(VARDATA_ANY(value), valsize, (PGLZ_Header *) tmp, PGLZ_strategy_default) && - VARSIZE(tmp) < VARSIZE(value)) + VARSIZE(tmp) < VARSIZE_ANY(value)) { /* successful compression */ - VARATT_SIZEP_DEPRECATED(tmp) |= VARATT_FLAG_COMPRESSED; return PointerGetDatum(tmp); } else @@ -992,7 +1046,7 @@ toast_compress_datum(Datum value) * toast_save_datum - * * Save one single datum into the secondary relation and return - * a varattrib reference for it. + * a Datum reference for it. * ---------- */ static Datum @@ -1006,7 +1060,8 @@ toast_save_datum(Relation rel, Datum value, Datum t_values[3]; bool t_isnull[3]; CommandId mycid = GetCurrentCommandId(); - varattrib *result; + struct varlena *result; + struct varatt_external toast_pointer; struct { struct varlena hdr; @@ -1027,44 +1082,51 @@ toast_save_datum(Relation rel, Datum value, toastidx = index_open(toastrel->rd_rel->reltoastidxid, RowExclusiveLock); /* - * Create the varattrib reference + * Get the data pointer and length, and compute va_rawsize and va_extsize. + * + * va_rawsize is the size of the equivalent fully uncompressed datum, + * so we have to adjust for short headers. + * + * va_extsize is the actual size of the data payload in the toast records. */ - result = (varattrib *) palloc(sizeof(varattrib)); - - SET_VARSIZE(result, sizeof(varattrib)); - VARATT_SIZEP_DEPRECATED(result) |= VARATT_FLAG_EXTERNAL; - if (VARATT_IS_COMPRESSED(value)) + if (VARATT_IS_SHORT(value)) + { + data_p = VARDATA_SHORT(value); + data_todo = VARSIZE_SHORT(value) - VARHDRSZ_SHORT; + toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */ + toast_pointer.va_extsize = data_todo; + } + else if (VARATT_IS_COMPRESSED(value)) { - VARATT_SIZEP_DEPRECATED(result) |= VARATT_FLAG_COMPRESSED; - result->va_content.va_external.va_rawsize = - ((varattrib *) value)->va_content.va_compressed.va_rawsize; + data_p = VARDATA(value); + data_todo = VARSIZE(value) - VARHDRSZ; + /* rawsize in a compressed datum is just the size of the payload */ + toast_pointer.va_rawsize = VARRAWSIZE_4B_C(value) + VARHDRSZ; + toast_pointer.va_extsize = data_todo; + /* Assert that the numbers look like it's compressed */ + Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); } else - result->va_content.va_external.va_rawsize = VARSIZE(value); + { + data_p = VARDATA(value); + data_todo = VARSIZE(value) - VARHDRSZ; + toast_pointer.va_rawsize = VARSIZE(value); + toast_pointer.va_extsize = data_todo; + } - result->va_content.va_external.va_extsize = - VARSIZE(value) - VARHDRSZ; - result->va_content.va_external.va_valueid = - GetNewOidWithIndex(toastrel, toastidx); - result->va_content.va_external.va_toastrelid = - rel->rd_rel->reltoastrelid; + toast_pointer.va_valueid = GetNewOidWithIndex(toastrel, toastidx); + toast_pointer.va_toastrelid = rel->rd_rel->reltoastrelid; /* * Initialize constant parts of the tuple data */ - t_values[0] = ObjectIdGetDatum(result->va_content.va_external.va_valueid); + t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid); t_values[2] = PointerGetDatum(&chunk_data); t_isnull[0] = false; t_isnull[1] = false; t_isnull[2] = false; /* - * Get the data to process - */ - data_p = VARDATA(value); - data_todo = VARSIZE(value) - VARHDRSZ; - - /* * Split up the item into chunks */ while (data_todo > 0) @@ -1111,11 +1173,18 @@ toast_save_datum(Relation rel, Datum value, } /* - * Done - close toast relation and return the reference + * Done - close toast relation */ index_close(toastidx, RowExclusiveLock); heap_close(toastrel, RowExclusiveLock); + /* + * Create the TOAST pointer value that we'll return + */ + result = (struct varlena *) palloc(sizeof(varattrib_pointer)); + SET_VARSIZE_EXTERNAL(result); + memcpy(VARDATA_SHORT(result), &toast_pointer, sizeof(toast_pointer)); + return PointerGetDatum(result); } @@ -1129,7 +1198,8 @@ toast_save_datum(Relation rel, Datum value, static void toast_delete_datum(Relation rel, Datum value) { - varattrib *attr = (varattrib *) DatumGetPointer(value); + struct varlena *attr = (struct varlena *) DatumGetPointer(value); + struct varatt_external toast_pointer; Relation toastrel; Relation toastidx; ScanKeyData toastkey; @@ -1139,11 +1209,14 @@ toast_delete_datum(Relation rel, Datum value) if (!VARATT_IS_EXTERNAL(attr)) return; + /* Must copy to access aligned fields */ + memcpy(&toast_pointer, VARDATA_SHORT(attr), + sizeof(struct varatt_external)); + /* * Open the toast relation and its index */ - toastrel = heap_open(attr->va_content.va_external.va_toastrelid, - RowExclusiveLock); + toastrel = heap_open(toast_pointer.va_toastrelid, RowExclusiveLock); toastidx = index_open(toastrel->rd_rel->reltoastidxid, RowExclusiveLock); /* @@ -1153,7 +1226,7 @@ toast_delete_datum(Relation rel, Datum value) ScanKeyInit(&toastkey, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); + ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Find the chunks by index @@ -1180,12 +1253,12 @@ toast_delete_datum(Relation rel, Datum value) /* ---------- * toast_fetch_datum - * - * Reconstruct an in memory varattrib from the chunks saved + * Reconstruct an in memory Datum from the chunks saved * in the toast relation * ---------- */ -static varattrib * -toast_fetch_datum(varattrib *attr) +static struct varlena * +toast_fetch_datum(struct varlena *attr) { Relation toastrel; Relation toastidx; @@ -1193,28 +1266,35 @@ toast_fetch_datum(varattrib *attr) IndexScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; - varattrib *result; + struct varlena *result; + struct varatt_external toast_pointer; int32 ressize; int32 residx, nextidx; int32 numchunks; Pointer chunk; bool isnull; + char *chunkdata; int32 chunksize; - ressize = attr->va_content.va_external.va_extsize; + /* Must copy to access aligned fields */ + memcpy(&toast_pointer, VARDATA_SHORT(attr), + sizeof(struct varatt_external)); + + ressize = toast_pointer.va_extsize; numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; - result = (varattrib *) palloc(ressize + VARHDRSZ); - SET_VARSIZE(result, ressize + VARHDRSZ); - if (VARATT_IS_COMPRESSED(attr)) - VARATT_SIZEP_DEPRECATED(result) |= VARATT_FLAG_COMPRESSED; + result = (struct varlena *) palloc(ressize + VARHDRSZ); + + if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) + SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ); + else + SET_VARSIZE(result, ressize + VARHDRSZ); /* * Open the toast relation and its index */ - toastrel = heap_open(attr->va_content.va_external.va_toastrelid, - AccessShareLock); + toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); @@ -1224,7 +1304,7 @@ toast_fetch_datum(varattrib *attr) ScanKeyInit(&toastkey, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); + ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Read the chunks by index @@ -1246,7 +1326,24 @@ toast_fetch_datum(varattrib *attr) Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); - chunksize = VARSIZE(chunk) - VARHDRSZ; + if (!VARATT_IS_EXTENDED(chunk)) + { + chunksize = VARSIZE(chunk) - VARHDRSZ; + chunkdata = VARDATA(chunk); + } + else if (VARATT_IS_SHORT(chunk)) + { + /* could happen due to heap_form_tuple doing its thing */ + chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; + chunkdata = VARDATA_SHORT(chunk); + } + else + { + /* should never happen */ + elog(ERROR, "found toasted toast chunk"); + chunksize = 0; /* keep compiler quiet */ + chunkdata = NULL; + } /* * Some checks on the data we've found @@ -1254,31 +1351,35 @@ toast_fetch_datum(varattrib *attr) if (residx != nextidx) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u", residx, nextidx, - attr->va_content.va_external.va_valueid); + toast_pointer.va_valueid); if (residx < numchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) - elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", - chunksize, residx, - attr->va_content.va_external.va_valueid); + elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u", + chunksize, (int) TOAST_MAX_CHUNK_SIZE, + residx, numchunks, + toast_pointer.va_valueid); } - else if (residx < numchunks) + else if (residx == numchunks-1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize) - elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", - chunksize, residx, - attr->va_content.va_external.va_valueid); + elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u", + chunksize, + (int) (ressize - residx*TOAST_MAX_CHUNK_SIZE), + residx, + toast_pointer.va_valueid); } else - elog(ERROR, "unexpected chunk number %d for toast value %u", + elog(ERROR, "unexpected chunk number %d for toast value %u (out of range %d..%d)", residx, - attr->va_content.va_external.va_valueid); + toast_pointer.va_valueid, + 0, numchunks-1); /* * Copy the data into proper place in our result */ memcpy(VARDATA(result) + residx * TOAST_MAX_CHUNK_SIZE, - VARDATA(chunk), + chunkdata, chunksize); nextidx++; @@ -1290,7 +1391,7 @@ toast_fetch_datum(varattrib *attr) if (nextidx != numchunks) elog(ERROR, "missing chunk number %d for toast value %u", nextidx, - attr->va_content.va_external.va_valueid); + toast_pointer.va_valueid); /* * End scan and close relations @@ -1305,12 +1406,12 @@ toast_fetch_datum(varattrib *attr) /* ---------- * toast_fetch_datum_slice - * - * Reconstruct a segment of a varattrib from the chunks saved + * Reconstruct a segment of a Datum from the chunks saved * in the toast relation * ---------- */ -static varattrib * -toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) +static struct varlena * +toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length) { Relation toastrel; Relation toastidx; @@ -1319,7 +1420,8 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) IndexScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; - varattrib *result; + struct varlena *result; + struct varatt_external toast_pointer; int32 attrsize; int32 residx; int32 nextidx; @@ -1331,11 +1433,16 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) int totalchunks; Pointer chunk; bool isnull; + char *chunkdata; int32 chunksize; int32 chcpystrt; int32 chcpyend; - attrsize = attr->va_content.va_external.va_extsize; + /* Must copy to access aligned fields */ + memcpy(&toast_pointer, VARDATA_SHORT(attr), + sizeof(struct varatt_external)); + + attrsize = toast_pointer.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; if (sliceoffset >= attrsize) @@ -1347,11 +1454,12 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) if (((sliceoffset + length) > attrsize) || length < 0) length = attrsize - sliceoffset; - result = (varattrib *) palloc(length + VARHDRSZ); - SET_VARSIZE(result, length + VARHDRSZ); + result = (struct varlena *) palloc(length + VARHDRSZ); - if (VARATT_IS_COMPRESSED(attr)) - VARATT_SIZEP_DEPRECATED(result) |= VARATT_FLAG_COMPRESSED; + if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) + SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ); + else + SET_VARSIZE(result, length + VARHDRSZ); if (length == 0) return result; /* Can save a lot of work at this point! */ @@ -1366,8 +1474,7 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) /* * Open the toast relation and its index */ - toastrel = heap_open(attr->va_content.va_external.va_toastrelid, - AccessShareLock); + toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); @@ -1378,7 +1485,7 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) ScanKeyInit(&toastkey[0], (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); + ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Use equality condition for one chunk, a range condition otherwise: @@ -1421,7 +1528,24 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); - chunksize = VARSIZE(chunk) - VARHDRSZ; + if (!VARATT_IS_EXTENDED(chunk)) + { + chunksize = VARSIZE(chunk) - VARHDRSZ; + chunkdata = VARDATA(chunk); + } + else if (VARATT_IS_SHORT(chunk)) + { + /* could happen due to heap_form_tuple doing its thing */ + chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; + chunkdata = VARDATA_SHORT(chunk); + } + else + { + /* should never happen */ + elog(ERROR, "found toasted toast chunk"); + chunksize = 0; /* keep compiler quiet */ + chunkdata = NULL; + } /* * Some checks on the data we've found @@ -1429,21 +1553,29 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk)) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u", residx, nextidx, - attr->va_content.va_external.va_valueid); + toast_pointer.va_valueid); if (residx < totalchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) - elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", - chunksize, residx, - attr->va_content.va_external.va_valueid); + elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u when fetching slice", + chunksize, (int) TOAST_MAX_CHUNK_SIZE, + residx, totalchunks, + toast_pointer.va_valueid); } - else + else if (residx == totalchunks-1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize) - elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", - chunksize, residx, - attr->va_content.va_external.va_valueid); + elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u when fetching slice", + chunksize, + (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE), + residx, + toast_pointer.va_valueid); } + else + elog(ERROR, "unexpected chunk number %d for toast value %u (out of range %d..%d)", + residx, + toast_pointer.va_valueid, + 0, totalchunks-1); /* * Copy the data into proper place in our result @@ -1457,7 +1589,7 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) memcpy(VARDATA(result) + (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt, - VARDATA(chunk) + chcpystrt, + chunkdata + chcpystrt, (chcpyend - chcpystrt) + 1); nextidx++; @@ -1469,7 +1601,7 @@ toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) if (nextidx != (endchunk + 1)) elog(ERROR, "missing chunk number %d for toast value %u", nextidx, - attr->va_content.va_external.va_valueid); + toast_pointer.va_valueid); /* * End scan and close relations |