diff options
Diffstat (limited to 'src/include/utils/jsonb.h')
-rw-r--r-- | src/include/utils/jsonb.h | 125 |
1 files changed, 81 insertions, 44 deletions
diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h index 91e3e140b4d..b89e4cb92cc 100644 --- a/src/include/utils/jsonb.h +++ b/src/include/utils/jsonb.h @@ -83,9 +83,9 @@ typedef struct JsonbValue JsonbValue; * buffer is accessed, but they can also be deep copied and passed around. * * Jsonb is a tree structure. Each node in the tree consists of a JEntry - * header, and a variable-length content. The JEntry header indicates what - * kind of a node it is, e.g. a string or an array, and the offset and length - * of its variable-length portion within the container. + * header and a variable-length content (possibly of zero size). The JEntry + * header indicates what kind of a node it is, e.g. a string or an array, + * and provides the length of its variable-length portion. * * The JEntry and the content of a node are not stored physically together. * Instead, the container array or object has an array that holds the JEntrys @@ -95,40 +95,52 @@ typedef struct JsonbValue JsonbValue; * hold its JEntry. Hence, no JEntry header is stored for the root node. It * is implicitly known that the root node must be an array or an object, * so we can get away without the type indicator as long as we can distinguish - * the two. For that purpose, both an array and an object begins with a uint32 + * the two. For that purpose, both an array and an object begin with a uint32 * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked * scalar value needs to be stored as a Jsonb value, what we actually store is * an array with one element, with the flags in the array's header field set * to JB_FSCALAR | JB_FARRAY. * - * To encode the length and offset of the variable-length portion of each - * node in a compact way, the JEntry stores only the end offset within the - * variable-length portion of the container node. For the first JEntry in the - * container's JEntry array, that equals to the length of the node data. The - * begin offset and length of the rest of the entries can be calculated using - * the end offset of the previous JEntry in the array. - * * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct, * the variable-length portion of some node types is aligned to a 4-byte * boundary, while others are not. When alignment is needed, the padding is * in the beginning of the node that requires it. For example, if a numeric * node is stored after a string node, so that the numeric node begins at * offset 3, the variable-length portion of the numeric node will begin with - * one padding byte. + * one padding byte so that the actual numeric data is 4-byte aligned. */ /* - * Jentry format. + * JEntry format. + * + * The least significant 28 bits store either the data length of the entry, + * or its end+1 offset from the start of the variable-length portion of the + * containing object. The next three bits store the type of the entry, and + * the high-order bit tells whether the least significant bits store a length + * or an offset. + * + * The reason for the offset-or-length complication is to compromise between + * access speed and data compressibility. In the initial design each JEntry + * always stored an offset, but this resulted in JEntry arrays with horrible + * compressibility properties, so that TOAST compression of a JSONB did not + * work well. Storing only lengths would greatly improve compressibility, + * but it makes random access into large arrays expensive (O(N) not O(1)). + * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and + * a length in the rest. This results in reasonably compressible data (as + * long as the stride isn't too small). We may have to examine as many as + * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any + * given item, but that's still O(1) no matter how large the container is. * - * The least significant 28 bits store the end offset of the entry (see - * JBE_ENDPOS, JBE_OFF, JBE_LEN macros below). The next three bits - * are used to store the type of the entry. The most significant bit - * is unused, and should be set to zero. + * We could avoid eating a flag bit for this purpose if we were to store + * the stride in the container header, or if we were willing to treat the + * stride as an unchangeable constant. Neither of those options is very + * attractive though. */ typedef uint32 JEntry; -#define JENTRY_POSMASK 0x0FFFFFFF +#define JENTRY_OFFLENMASK 0x0FFFFFFF #define JENTRY_TYPEMASK 0x70000000 +#define JENTRY_HAS_OFF 0x80000000 /* values stored in the type bits */ #define JENTRY_ISSTRING 0x00000000 @@ -138,7 +150,9 @@ typedef uint32 JEntry; #define JENTRY_ISNULL 0x40000000 #define JENTRY_ISCONTAINER 0x50000000 /* array or object */ -/* Note possible multiple evaluations */ +/* Access macros. Note possible multiple evaluations */ +#define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK) +#define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0) #define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING) #define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC) #define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER) @@ -147,20 +161,34 @@ typedef uint32 JEntry; #define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE) #define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_)) +/* Macro for advancing an offset variable to the next JEntry */ +#define JBE_ADVANCE_OFFSET(offset, je) \ + do { \ + JEntry je_ = (je); \ + if (JBE_HAS_OFF(je_)) \ + (offset) = JBE_OFFLENFLD(je_); \ + else \ + (offset) += JBE_OFFLENFLD(je_); \ + } while(0) + /* - * Macros for getting the offset and length of an element. Note multiple - * evaluations and access to prior array element. + * We store an offset, not a length, every JB_OFFSET_STRIDE children. + * Caution: this macro should only be referenced when creating a JSONB + * value. When examining an existing value, pay attention to the HAS_OFF + * bits instead. This allows changes in the offset-placement heuristic + * without breaking on-disk compatibility. */ -#define JBE_ENDPOS(je_) ((je_) & JENTRY_POSMASK) -#define JBE_OFF(ja, i) ((i) == 0 ? 0 : JBE_ENDPOS((ja)[i - 1])) -#define JBE_LEN(ja, i) ((i) == 0 ? JBE_ENDPOS((ja)[i]) \ - : JBE_ENDPOS((ja)[i]) - JBE_ENDPOS((ja)[i - 1])) +#define JB_OFFSET_STRIDE 32 /* * A jsonb array or object node, within a Jsonb Datum. * - * An array has one child for each element. An object has two children for - * each key/value pair. + * An array has one child for each element, stored in array order. + * + * An object has two children for each key/value pair. The keys all appear + * first, in key sort order; then the values appear, in an order matching the + * key order. This arrangement keeps the keys compact in memory, making a + * search for a particular key more cache-friendly. */ typedef struct JsonbContainer { @@ -172,8 +200,8 @@ typedef struct JsonbContainer } JsonbContainer; /* flags for the header-field in JsonbContainer */ -#define JB_CMASK 0x0FFFFFFF -#define JB_FSCALAR 0x10000000 +#define JB_CMASK 0x0FFFFFFF /* mask for count field */ +#define JB_FSCALAR 0x10000000 /* flag bits */ #define JB_FOBJECT 0x20000000 #define JB_FARRAY 0x40000000 @@ -248,18 +276,20 @@ struct JsonbValue (jsonbval)->type <= jbvBool) /* - * Pair within an Object. + * Key/value pair within an Object. * - * Pairs with duplicate keys are de-duplicated. We store the order for the - * benefit of doing so in a well-defined way with respect to the original - * observed order (which is "last observed wins"). This is only used briefly - * when originally constructing a Jsonb. + * This struct type is only used briefly while constructing a Jsonb; it is + * *not* the on-disk representation. + * + * Pairs with duplicate keys are de-duplicated. We store the originally + * observed pair ordering for the purpose of removing duplicates in a + * well-defined way (which is "last observed wins"). */ struct JsonbPair { JsonbValue key; /* Must be a jbvString */ JsonbValue value; /* May be of any type */ - uint32 order; /* preserves order of pairs with equal keys */ + uint32 order; /* Pair's index in original sequence */ }; /* Conversion state used when parsing Jsonb from text, or for type coercion */ @@ -287,20 +317,25 @@ typedef struct JsonbIterator { /* Container being iterated */ JsonbContainer *container; - uint32 nElems; /* Number of elements in children array (will be - * nPairs for objects) */ + uint32 nElems; /* Number of elements in children array (will + * be nPairs for objects) */ bool isScalar; /* Pseudo-array scalar value? */ - JEntry *children; + JEntry *children; /* JEntrys for child nodes */ + /* Data proper. This points to the beginning of the variable-length data */ + char *dataProper; - /* Current item in buffer (up to nElems, but must * 2 for objects) */ - int i; + /* Current item in buffer (up to nElems) */ + int curIndex; + + /* Data offset corresponding to current item */ + uint32 curDataOffset; /* - * Data proper. This points just past end of children array. - * We use the JBE_OFF() macro on the Jentrys to find offsets of each - * child in this area. + * If the container is an object, we want to return keys and values + * alternately; so curDataOffset points to the current key, and + * curValueOffset points to the current value. */ - char *dataProper; + uint32 curValueOffset; /* Private state */ JsonbIterState state; @@ -344,6 +379,8 @@ extern Datum gin_consistent_jsonb_path(PG_FUNCTION_ARGS); extern Datum gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS); /* Support functions */ +extern uint32 getJsonbOffset(const JsonbContainer *jc, int index); +extern uint32 getJsonbLength(const JsonbContainer *jc, int index); extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b); extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader, uint32 flags, |