aboutsummaryrefslogtreecommitdiff
path: root/src/include/utils/jsonb.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/utils/jsonb.h')
-rw-r--r--src/include/utils/jsonb.h213
1 files changed, 124 insertions, 89 deletions
diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h
index dea64ad7805..fc746c8b742 100644
--- a/src/include/utils/jsonb.h
+++ b/src/include/utils/jsonb.h
@@ -16,60 +16,18 @@
#include "utils/array.h"
#include "utils/numeric.h"
-/*
- * JB_CMASK is used to extract count of items
- *
- * It's not possible to get more than 2^28 items into an Jsonb.
- */
-#define JB_CMASK 0x0FFFFFFF
-
-#define JB_FSCALAR 0x10000000
-#define JB_FOBJECT 0x20000000
-#define JB_FARRAY 0x40000000
-
-/* Get information on varlena Jsonb */
-#define JB_ROOT_COUNT(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_CMASK)
-#define JB_ROOT_IS_SCALAR(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FSCALAR)
-#define JB_ROOT_IS_OBJECT(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FOBJECT)
-#define JB_ROOT_IS_ARRAY(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FARRAY)
-
-/* Jentry macros */
-#define JENTRY_POSMASK 0x0FFFFFFF
-#define JENTRY_ISFIRST 0x80000000
-#define JENTRY_TYPEMASK (~(JENTRY_POSMASK | JENTRY_ISFIRST))
-#define JENTRY_ISSTRING 0x00000000
-#define JENTRY_ISNUMERIC 0x10000000
-#define JENTRY_ISNEST 0x20000000
-#define JENTRY_ISNULL 0x40000000
-#define JENTRY_ISBOOL (JENTRY_ISNUMERIC | JENTRY_ISNEST)
-#define JENTRY_ISFALSE JENTRY_ISBOOL
-#define JENTRY_ISTRUE (JENTRY_ISBOOL | 0x40000000)
-/* Note possible multiple evaluations, also access to prior array element */
-#define JBE_ISFIRST(je_) (((je_).header & JENTRY_ISFIRST) != 0)
-#define JBE_ISSTRING(je_) (((je_).header & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
-#define JBE_ISNUMERIC(je_) (((je_).header & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
-#define JBE_ISNEST(je_) (((je_).header & JENTRY_TYPEMASK) == JENTRY_ISNEST)
-#define JBE_ISNULL(je_) (((je_).header & JENTRY_TYPEMASK) == JENTRY_ISNULL)
-#define JBE_ISBOOL(je_) (((je_).header & JENTRY_TYPEMASK & JENTRY_ISBOOL) == JENTRY_ISBOOL)
-#define JBE_ISBOOL_TRUE(je_) (((je_).header & JENTRY_TYPEMASK) == JENTRY_ISTRUE)
-#define JBE_ISBOOL_FALSE(je_) (JBE_ISBOOL(je_) && !JBE_ISBOOL_TRUE(je_))
-
-/* Get offset for Jentry */
-#define JBE_ENDPOS(je_) ((je_).header & JENTRY_POSMASK)
-#define JBE_OFF(je_) (JBE_ISFIRST(je_) ? 0 : JBE_ENDPOS((&(je_))[-1]))
-#define JBE_LEN(je_) (JBE_ISFIRST(je_) ? \
- JBE_ENDPOS(je_) \
- : JBE_ENDPOS(je_) - JBE_ENDPOS((&(je_))[-1]))
-
-/* Flags indicating a stage of sequential Jsonb processing */
-#define WJB_DONE 0x000
-#define WJB_KEY 0x001
-#define WJB_VALUE 0x002
-#define WJB_ELEM 0x004
-#define WJB_BEGIN_ARRAY 0x008
-#define WJB_END_ARRAY 0x010
-#define WJB_BEGIN_OBJECT 0x020
-#define WJB_END_OBJECT 0x040
+/* Tokens used when sequentially processing a jsonb value */
+typedef enum
+{
+ WJB_DONE,
+ WJB_KEY,
+ WJB_VALUE,
+ WJB_ELEM,
+ WJB_BEGIN_ARRAY,
+ WJB_END_ARRAY,
+ WJB_BEGIN_OBJECT,
+ WJB_END_OBJECT
+} JsonbIteratorToken;
/*
* When using a GIN index for jsonb, we choose to index both keys and values.
@@ -98,7 +56,6 @@
typedef struct JsonbPair JsonbPair;
typedef struct JsonbValue JsonbValue;
-typedef char *JsonbSuperHeader;
/*
* Jsonbs are varlena objects, so must meet the varlena convention that the
@@ -109,35 +66,115 @@ typedef char *JsonbSuperHeader;
* representation. Often, JsonbValues are just shims through which a Jsonb
* buffer is accessed, but they can also be deep copied and passed around.
*
- * We have an abstraction called a "superheader". This is a pointer that
- * conventionally points to the first item after our 4-byte uncompressed
- * varlena header, from which we can read flags using bitwise operations.
+ * Jsonb is a tree structure. Each node in the tree consists of a JEntry
+ * header, and a variable-length content. The JEntry header indicates what
+ * kind of a node it is, e.g. a string or an array, and the offset and length
+ * of its variable-length portion within the container.
*
- * Frequently, we pass a superheader reference to a function, and it doesn't
- * matter if it points to just after the start of a Jsonb, or to a temp buffer.
+ * The JEntry and the content of a node are not stored physically together.
+ * Instead, the container array or object has an array that holds the JEntrys
+ * of all the child nodes, followed by their variable-length portions.
+ *
+ * The root node is an exception; it has no parent array or object that could
+ * hold its JEntry. Hence, no JEntry header is stored for the root node. It
+ * is implicitly known that the the root node must be an array or an object,
+ * so we can get away without the type indicator as long as we can distinguish
+ * the two. For that purpose, both an array and an object begins with a uint32
+ * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked
+ * scalar value needs to be stored as a Jsonb value, what we actually store is
+ * an array with one element, with the flags in the array's header field set
+ * to JB_FSCALAR | JB_FARRAY.
+ *
+ * To encode the length and offset of the variable-length portion of each
+ * node in a compact way, the JEntry stores only the end offset within the
+ * variable-length portion of the container node. For the first JEntry in the
+ * container's JEntry array, that equals to the length of the node data. For
+ * convenience, the JENTRY_ISFIRST flag is set. The begin offset and length
+ * of the rest of the entries can be calculated using the end offset of the
+ * previous JEntry in the array.
+ *
+ * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
+ * the variable-length portion of some node types is aligned to a 4-byte
+ * boundary, while others are not. When alignment is needed, the padding is
+ * in the beginning of the node that requires it. For example, if a numeric
+ * node is stored after a string node, so that the numeric node begins at
+ * offset 3, the variable-length portion of the numeric node will begin with
+ * one padding byte.
*/
-typedef struct
-{
- int32 vl_len_; /* varlena header (do not touch directly!) */
- uint32 superheader;
- /* (array of JEntry follows, size determined using uint32 superheader) */
-} Jsonb;
/*
- * JEntry: there is one of these for each key _and_ value for objects. Arrays
- * have one per element.
+ * Jentry format.
+ *
+ * The least significant 28 bits store the end offset of the entry (see
+ * JBE_ENDPOS, JBE_OFF, JBE_LEN macros below). The next three bits
+ * are used to store the type of the entry. The most significant bit
+ * is set on the first entry in an array of JEntrys.
+ */
+typedef uint32 JEntry;
+
+#define JENTRY_POSMASK 0x0FFFFFFF
+#define JENTRY_TYPEMASK 0x70000000
+#define JENTRY_ISFIRST 0x80000000
+
+/* values stored in the type bits */
+#define JENTRY_ISSTRING 0x00000000
+#define JENTRY_ISNUMERIC 0x10000000
+#define JENTRY_ISCONTAINER 0x20000000 /* array or object */
+#define JENTRY_ISBOOL_FALSE 0x30000000
+#define JENTRY_ISNULL 0x40000000
+#define JENTRY_ISBOOL_TRUE 0x70000000
+
+/* Note possible multiple evaluations, also access to prior array element */
+#define JBE_ISFIRST(je_) (((je_) & JENTRY_ISFIRST) != 0)
+#define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
+#define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
+#define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
+#define JBE_ISNULL(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL)
+#define JBE_ISBOOL_TRUE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE)
+#define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
+#define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
+
+/* Get offset for Jentry */
+#define JBE_ENDPOS(je_) ((je_) & JENTRY_POSMASK)
+#define JBE_OFF(je_) (JBE_ISFIRST(je_) ? 0 : JBE_ENDPOS((&(je_))[-1]))
+#define JBE_LEN(je_) (JBE_ISFIRST(je_) ? \
+ JBE_ENDPOS(je_) \
+ : JBE_ENDPOS(je_) - JBE_ENDPOS((&(je_))[-1]))
+
+/*
+ * A jsonb array or object node, within a Jsonb Datum.
*
- * The position offset points to the _end_ so that we can get the length by
- * subtraction from the previous entry. The JENTRY_ISFIRST flag indicates if
- * there is a previous entry.
+ * An array has one child for each element. An object has two children for
+ * each key/value pair.
*/
+typedef struct JsonbContainer
+{
+ uint32 header; /* number of elements or key/value pairs, and
+ * flags */
+ JEntry children[1]; /* variable length */
+
+ /* the data for each child node follows. */
+} JsonbContainer;
+
+/* flags for the header-field in JsonbContainer */
+#define JB_CMASK 0x0FFFFFFF
+#define JB_FSCALAR 0x10000000
+#define JB_FOBJECT 0x20000000
+#define JB_FARRAY 0x40000000
+
+/* The top-level on-disk format for a jsonb datum. */
typedef struct
{
- uint32 header; /* Shares some flags with superheader */
-} JEntry;
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ JsonbContainer root;
+} Jsonb;
+
+/* convenience macros for accessing the root container in a Jsonb datum */
+#define JB_ROOT_COUNT(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_CMASK)
+#define JB_ROOT_IS_SCALAR(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FSCALAR)
+#define JB_ROOT_IS_OBJECT(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FOBJECT)
+#define JB_ROOT_IS_ARRAY(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FARRAY)
-#define IsAJsonbScalar(jsonbval) ((jsonbval)->type >= jbvNull && \
- (jsonbval)->type <= jbvBool)
/*
* JsonbValue: In-memory representation of Jsonb. This is a convenient
@@ -161,8 +198,6 @@ struct JsonbValue
jbvBinary
} type; /* Influences sort order */
- int estSize; /* Estimated size of node (including subnodes) */
-
union
{
Numeric numeric;
@@ -189,11 +224,14 @@ struct JsonbValue
struct
{
int len;
- char *data;
+ JsonbContainer *data;
} binary;
} val;
};
+#define IsAJsonbScalar(jsonbval) ((jsonbval)->type >= jbvNull && \
+ (jsonbval)->type <= jbvBool)
+
/*
* Pair within an Object.
*
@@ -294,27 +332,24 @@ extern Datum gin_consistent_jsonb_hash(PG_FUNCTION_ARGS);
extern Datum gin_triconsistent_jsonb_hash(PG_FUNCTION_ARGS);
/* Support functions */
-extern int compareJsonbSuperHeaderValue(JsonbSuperHeader a,
- JsonbSuperHeader b);
-extern JsonbValue *findJsonbValueFromSuperHeader(JsonbSuperHeader sheader,
+extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
+extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader,
uint32 flags,
- uint32 *lowbound,
JsonbValue *key);
-extern JsonbValue *getIthJsonbValueFromSuperHeader(JsonbSuperHeader sheader,
+extern JsonbValue *getIthJsonbValueFromContainer(JsonbContainer *sheader,
uint32 i);
-extern JsonbValue *pushJsonbValue(JsonbParseState **pstate, int seq,
- JsonbValue *scalarVal);
-extern JsonbIterator *JsonbIteratorInit(JsonbSuperHeader buffer);
-extern int JsonbIteratorNext(JsonbIterator **it, JsonbValue *val,
+extern JsonbValue *pushJsonbValue(JsonbParseState **pstate,
+ JsonbIteratorToken seq, JsonbValue *scalarVal);
+extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container);
+extern JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val,
bool skipNested);
extern Jsonb *JsonbValueToJsonb(JsonbValue *val);
extern bool JsonbDeepContains(JsonbIterator **val,
JsonbIterator **mContained);
-extern JsonbValue *arrayToJsonbSortedArray(ArrayType *a);
extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash);
/* jsonb.c support function */
-extern char *JsonbToCString(StringInfo out, JsonbSuperHeader in,
+extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
int estimated_len);
#endif /* __JSONB_H__ */