diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2015-05-14 12:08:40 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2015-05-14 12:08:49 -0400 |
commit | 1dc5ebc9077ab742079ce5dac9a6664248d42916 (patch) | |
tree | 68aa827a8be94c16b456d8f78263507fcff9ee4a /src/backend/utils/adt/datum.c | |
parent | 8a2e1edd2ba0817313c1c0ef76b03a5ab819d17f (diff) | |
download | postgresql-1dc5ebc9077ab742079ce5dac9a6664248d42916.tar.gz postgresql-1dc5ebc9077ab742079ce5dac9a6664248d42916.zip |
Support "expanded" objects, particularly arrays, for better performance.
This patch introduces the ability for complex datatypes to have an
in-memory representation that is different from their on-disk format.
On-disk formats are typically optimized for minimal size, and in any case
they can't contain pointers, so they are often not well-suited for
computation. Now a datatype can invent an "expanded" in-memory format
that is better suited for its operations, and then pass that around among
the C functions that operate on the datatype. There are also provisions
(rudimentary as yet) to allow an expanded object to be modified in-place
under suitable conditions, so that operations like assignment to an element
of an array need not involve copying the entire array.
The initial application for this feature is arrays, but it is not hard
to foresee using it for other container types like JSON, XML and hstore.
I have hopes that it will be useful to PostGIS as well.
In this initial implementation, a few heuristics have been hard-wired
into plpgsql to improve performance for arrays that are stored in
plpgsql variables. We would like to generalize those hacks so that
other datatypes can obtain similar improvements, but figuring out some
appropriate APIs is left as a task for future work. (The heuristics
themselves are probably not optimal yet, either, as they sometimes
force expansion of arrays that would be better left alone.)
Preliminary performance testing shows impressive speed gains for plpgsql
functions that do element-by-element access or update of large arrays.
There are other cases that get a little slower, as a result of added array
format conversions; but we can hope to improve anything that's annoyingly
bad. In any case most applications should see a net win.
Tom Lane, reviewed by Andres Freund
Diffstat (limited to 'src/backend/utils/adt/datum.c')
-rw-r--r-- | src/backend/utils/adt/datum.c | 86 |
1 files changed, 63 insertions, 23 deletions
diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c index 014eca5185c..e8af0304c0b 100644 --- a/src/backend/utils/adt/datum.c +++ b/src/backend/utils/adt/datum.c @@ -12,8 +12,9 @@ * *------------------------------------------------------------------------- */ + /* - * In the implementation of the next routines we assume the following: + * In the implementation of these routines we assume the following: * * A) if a type is "byVal" then all the information is stored in the * Datum itself (i.e. no pointers involved!). In this case the @@ -34,11 +35,15 @@ * * Note that we do not treat "toasted" datums specially; therefore what * will be copied or compared is the compressed data or toast reference. + * An exception is made for datumCopy() of an expanded object, however, + * because most callers expect to get a simple contiguous (and pfree'able) + * result from datumCopy(). See also datumTransfer(). */ #include "postgres.h" #include "utils/datum.h" +#include "utils/expandeddatum.h" /*------------------------------------------------------------------------- @@ -46,6 +51,7 @@ * * Find the "real" size of a datum, given the datum value, * whether it is a "by value", and the declared type length. + * (For TOAST pointer datums, this is the size of the pointer datum.) * * This is essentially an out-of-line version of the att_addlength_datum() * macro in access/tupmacs.h. We do a tad more error checking though. @@ -106,9 +112,16 @@ datumGetSize(Datum value, bool typByVal, int typLen) /*------------------------------------------------------------------------- * datumCopy * - * make a copy of a datum + * Make a copy of a non-NULL datum. * * If the datatype is pass-by-reference, memory is obtained with palloc(). + * + * If the value is a reference to an expanded object, we flatten into memory + * obtained with palloc(). We need to copy because one of the main uses of + * this function is to copy a datum out of a transient memory context that's + * about to be destroyed, and the expanded object is probably in a child + * context that will also go away. Moreover, many callers assume that the + * result is a single pfree-able chunk. *------------------------------------------------------------------------- */ Datum @@ -118,44 +131,71 @@ datumCopy(Datum value, bool typByVal, int typLen) if (typByVal) res = value; + else if (typLen == -1) + { + /* It is a varlena datatype */ + struct varlena *vl = (struct varlena *) DatumGetPointer(value); + + if (VARATT_IS_EXTERNAL_EXPANDED(vl)) + { + /* Flatten into the caller's memory context */ + ExpandedObjectHeader *eoh = DatumGetEOHP(value); + Size resultsize; + char *resultptr; + + resultsize = EOH_get_flat_size(eoh); + resultptr = (char *) palloc(resultsize); + EOH_flatten_into(eoh, (void *) resultptr, resultsize); + res = PointerGetDatum(resultptr); + } + else + { + /* Otherwise, just copy the varlena datum verbatim */ + Size realSize; + char *resultptr; + + realSize = (Size) VARSIZE_ANY(vl); + resultptr = (char *) palloc(realSize); + memcpy(resultptr, vl, realSize); + res = PointerGetDatum(resultptr); + } + } else { + /* Pass by reference, but not varlena, so not toasted */ Size realSize; - char *s; - - if (DatumGetPointer(value) == NULL) - return PointerGetDatum(NULL); + char *resultptr; realSize = datumGetSize(value, typByVal, typLen); - s = (char *) palloc(realSize); - memcpy(s, DatumGetPointer(value), realSize); - res = PointerGetDatum(s); + resultptr = (char *) palloc(realSize); + memcpy(resultptr, DatumGetPointer(value), realSize); + res = PointerGetDatum(resultptr); } return res; } /*------------------------------------------------------------------------- - * datumFree + * datumTransfer * - * Free the space occupied by a datum CREATED BY "datumCopy" + * Transfer a non-NULL datum into the current memory context. * - * NOTE: DO NOT USE THIS ROUTINE with datums returned by heap_getattr() etc. - * ONLY datums created by "datumCopy" can be freed! + * This is equivalent to datumCopy() except when the datum is a read-write + * pointer to an expanded object. In that case we merely reparent the object + * into the current context, and return its standard R/W pointer (in case the + * given one is a transient pointer of shorter lifespan). *------------------------------------------------------------------------- */ -#ifdef NOT_USED -void -datumFree(Datum value, bool typByVal, int typLen) +Datum +datumTransfer(Datum value, bool typByVal, int typLen) { - if (!typByVal) - { - Pointer s = DatumGetPointer(value); - - pfree(s); - } + if (!typByVal && typLen == -1 && + VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(value))) + value = TransferExpandedObject(value, CurrentMemoryContext); + else + value = datumCopy(value, typByVal, typLen); + return value; } -#endif /*------------------------------------------------------------------------- * datumIsEqual |