aboutsummaryrefslogtreecommitdiff
path: root/src/include/utils/jsonb.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/utils/jsonb.h')
-rw-r--r--src/include/utils/jsonb.h125
1 files changed, 81 insertions, 44 deletions
diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h
index 91e3e140b4d..b89e4cb92cc 100644
--- a/src/include/utils/jsonb.h
+++ b/src/include/utils/jsonb.h
@@ -83,9 +83,9 @@ typedef struct JsonbValue JsonbValue;
* buffer is accessed, but they can also be deep copied and passed around.
*
* Jsonb is a tree structure. Each node in the tree consists of a JEntry
- * header, and a variable-length content. The JEntry header indicates what
- * kind of a node it is, e.g. a string or an array, and the offset and length
- * of its variable-length portion within the container.
+ * header and a variable-length content (possibly of zero size). The JEntry
+ * header indicates what kind of a node it is, e.g. a string or an array,
+ * and provides the length of its variable-length portion.
*
* The JEntry and the content of a node are not stored physically together.
* Instead, the container array or object has an array that holds the JEntrys
@@ -95,40 +95,52 @@ typedef struct JsonbValue JsonbValue;
* hold its JEntry. Hence, no JEntry header is stored for the root node. It
* is implicitly known that the root node must be an array or an object,
* so we can get away without the type indicator as long as we can distinguish
- * the two. For that purpose, both an array and an object begins with a uint32
+ * the two. For that purpose, both an array and an object begin with a uint32
* header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked
* scalar value needs to be stored as a Jsonb value, what we actually store is
* an array with one element, with the flags in the array's header field set
* to JB_FSCALAR | JB_FARRAY.
*
- * To encode the length and offset of the variable-length portion of each
- * node in a compact way, the JEntry stores only the end offset within the
- * variable-length portion of the container node. For the first JEntry in the
- * container's JEntry array, that equals to the length of the node data. The
- * begin offset and length of the rest of the entries can be calculated using
- * the end offset of the previous JEntry in the array.
- *
* Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
* the variable-length portion of some node types is aligned to a 4-byte
* boundary, while others are not. When alignment is needed, the padding is
* in the beginning of the node that requires it. For example, if a numeric
* node is stored after a string node, so that the numeric node begins at
* offset 3, the variable-length portion of the numeric node will begin with
- * one padding byte.
+ * one padding byte so that the actual numeric data is 4-byte aligned.
*/
/*
- * Jentry format.
+ * JEntry format.
+ *
+ * The least significant 28 bits store either the data length of the entry,
+ * or its end+1 offset from the start of the variable-length portion of the
+ * containing object. The next three bits store the type of the entry, and
+ * the high-order bit tells whether the least significant bits store a length
+ * or an offset.
+ *
+ * The reason for the offset-or-length complication is to compromise between
+ * access speed and data compressibility. In the initial design each JEntry
+ * always stored an offset, but this resulted in JEntry arrays with horrible
+ * compressibility properties, so that TOAST compression of a JSONB did not
+ * work well. Storing only lengths would greatly improve compressibility,
+ * but it makes random access into large arrays expensive (O(N) not O(1)).
+ * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and
+ * a length in the rest. This results in reasonably compressible data (as
+ * long as the stride isn't too small). We may have to examine as many as
+ * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any
+ * given item, but that's still O(1) no matter how large the container is.
*
- * The least significant 28 bits store the end offset of the entry (see
- * JBE_ENDPOS, JBE_OFF, JBE_LEN macros below). The next three bits
- * are used to store the type of the entry. The most significant bit
- * is unused, and should be set to zero.
+ * We could avoid eating a flag bit for this purpose if we were to store
+ * the stride in the container header, or if we were willing to treat the
+ * stride as an unchangeable constant. Neither of those options is very
+ * attractive though.
*/
typedef uint32 JEntry;
-#define JENTRY_POSMASK 0x0FFFFFFF
+#define JENTRY_OFFLENMASK 0x0FFFFFFF
#define JENTRY_TYPEMASK 0x70000000
+#define JENTRY_HAS_OFF 0x80000000
/* values stored in the type bits */
#define JENTRY_ISSTRING 0x00000000
@@ -138,7 +150,9 @@ typedef uint32 JEntry;
#define JENTRY_ISNULL 0x40000000
#define JENTRY_ISCONTAINER 0x50000000 /* array or object */
-/* Note possible multiple evaluations */
+/* Access macros. Note possible multiple evaluations */
+#define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK)
+#define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0)
#define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
#define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
#define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
@@ -147,20 +161,34 @@ typedef uint32 JEntry;
#define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
#define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
+/* Macro for advancing an offset variable to the next JEntry */
+#define JBE_ADVANCE_OFFSET(offset, je) \
+ do { \
+ JEntry je_ = (je); \
+ if (JBE_HAS_OFF(je_)) \
+ (offset) = JBE_OFFLENFLD(je_); \
+ else \
+ (offset) += JBE_OFFLENFLD(je_); \
+ } while(0)
+
/*
- * Macros for getting the offset and length of an element. Note multiple
- * evaluations and access to prior array element.
+ * We store an offset, not a length, every JB_OFFSET_STRIDE children.
+ * Caution: this macro should only be referenced when creating a JSONB
+ * value. When examining an existing value, pay attention to the HAS_OFF
+ * bits instead. This allows changes in the offset-placement heuristic
+ * without breaking on-disk compatibility.
*/
-#define JBE_ENDPOS(je_) ((je_) & JENTRY_POSMASK)
-#define JBE_OFF(ja, i) ((i) == 0 ? 0 : JBE_ENDPOS((ja)[i - 1]))
-#define JBE_LEN(ja, i) ((i) == 0 ? JBE_ENDPOS((ja)[i]) \
- : JBE_ENDPOS((ja)[i]) - JBE_ENDPOS((ja)[i - 1]))
+#define JB_OFFSET_STRIDE 32
/*
* A jsonb array or object node, within a Jsonb Datum.
*
- * An array has one child for each element. An object has two children for
- * each key/value pair.
+ * An array has one child for each element, stored in array order.
+ *
+ * An object has two children for each key/value pair. The keys all appear
+ * first, in key sort order; then the values appear, in an order matching the
+ * key order. This arrangement keeps the keys compact in memory, making a
+ * search for a particular key more cache-friendly.
*/
typedef struct JsonbContainer
{
@@ -172,8 +200,8 @@ typedef struct JsonbContainer
} JsonbContainer;
/* flags for the header-field in JsonbContainer */
-#define JB_CMASK 0x0FFFFFFF
-#define JB_FSCALAR 0x10000000
+#define JB_CMASK 0x0FFFFFFF /* mask for count field */
+#define JB_FSCALAR 0x10000000 /* flag bits */
#define JB_FOBJECT 0x20000000
#define JB_FARRAY 0x40000000
@@ -248,18 +276,20 @@ struct JsonbValue
(jsonbval)->type <= jbvBool)
/*
- * Pair within an Object.
+ * Key/value pair within an Object.
*
- * Pairs with duplicate keys are de-duplicated. We store the order for the
- * benefit of doing so in a well-defined way with respect to the original
- * observed order (which is "last observed wins"). This is only used briefly
- * when originally constructing a Jsonb.
+ * This struct type is only used briefly while constructing a Jsonb; it is
+ * *not* the on-disk representation.
+ *
+ * Pairs with duplicate keys are de-duplicated. We store the originally
+ * observed pair ordering for the purpose of removing duplicates in a
+ * well-defined way (which is "last observed wins").
*/
struct JsonbPair
{
JsonbValue key; /* Must be a jbvString */
JsonbValue value; /* May be of any type */
- uint32 order; /* preserves order of pairs with equal keys */
+ uint32 order; /* Pair's index in original sequence */
};
/* Conversion state used when parsing Jsonb from text, or for type coercion */
@@ -287,20 +317,25 @@ typedef struct JsonbIterator
{
/* Container being iterated */
JsonbContainer *container;
- uint32 nElems; /* Number of elements in children array (will be
- * nPairs for objects) */
+ uint32 nElems; /* Number of elements in children array (will
+ * be nPairs for objects) */
bool isScalar; /* Pseudo-array scalar value? */
- JEntry *children;
+ JEntry *children; /* JEntrys for child nodes */
+ /* Data proper. This points to the beginning of the variable-length data */
+ char *dataProper;
- /* Current item in buffer (up to nElems, but must * 2 for objects) */
- int i;
+ /* Current item in buffer (up to nElems) */
+ int curIndex;
+
+ /* Data offset corresponding to current item */
+ uint32 curDataOffset;
/*
- * Data proper. This points just past end of children array.
- * We use the JBE_OFF() macro on the Jentrys to find offsets of each
- * child in this area.
+ * If the container is an object, we want to return keys and values
+ * alternately; so curDataOffset points to the current key, and
+ * curValueOffset points to the current value.
*/
- char *dataProper;
+ uint32 curValueOffset;
/* Private state */
JsonbIterState state;
@@ -344,6 +379,8 @@ extern Datum gin_consistent_jsonb_path(PG_FUNCTION_ARGS);
extern Datum gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS);
/* Support functions */
+extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
+extern uint32 getJsonbLength(const JsonbContainer *jc, int index);
extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader,
uint32 flags,