diff options
Diffstat (limited to 'src/include/utils/jsonb.h')
-rw-r--r-- | src/include/utils/jsonb.h | 213 |
1 files changed, 124 insertions, 89 deletions
diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h index dea64ad7805..fc746c8b742 100644 --- a/src/include/utils/jsonb.h +++ b/src/include/utils/jsonb.h @@ -16,60 +16,18 @@ #include "utils/array.h" #include "utils/numeric.h" -/* - * JB_CMASK is used to extract count of items - * - * It's not possible to get more than 2^28 items into an Jsonb. - */ -#define JB_CMASK 0x0FFFFFFF - -#define JB_FSCALAR 0x10000000 -#define JB_FOBJECT 0x20000000 -#define JB_FARRAY 0x40000000 - -/* Get information on varlena Jsonb */ -#define JB_ROOT_COUNT(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_CMASK) -#define JB_ROOT_IS_SCALAR(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FSCALAR) -#define JB_ROOT_IS_OBJECT(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FOBJECT) -#define JB_ROOT_IS_ARRAY(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FARRAY) - -/* Jentry macros */ -#define JENTRY_POSMASK 0x0FFFFFFF -#define JENTRY_ISFIRST 0x80000000 -#define JENTRY_TYPEMASK (~(JENTRY_POSMASK | JENTRY_ISFIRST)) -#define JENTRY_ISSTRING 0x00000000 -#define JENTRY_ISNUMERIC 0x10000000 -#define JENTRY_ISNEST 0x20000000 -#define JENTRY_ISNULL 0x40000000 -#define JENTRY_ISBOOL (JENTRY_ISNUMERIC | JENTRY_ISNEST) -#define JENTRY_ISFALSE JENTRY_ISBOOL -#define JENTRY_ISTRUE (JENTRY_ISBOOL | 0x40000000) -/* Note possible multiple evaluations, also access to prior array element */ -#define JBE_ISFIRST(je_) (((je_).header & JENTRY_ISFIRST) != 0) -#define JBE_ISSTRING(je_) (((je_).header & JENTRY_TYPEMASK) == JENTRY_ISSTRING) -#define JBE_ISNUMERIC(je_) (((je_).header & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC) -#define JBE_ISNEST(je_) (((je_).header & JENTRY_TYPEMASK) == JENTRY_ISNEST) -#define JBE_ISNULL(je_) (((je_).header & JENTRY_TYPEMASK) == JENTRY_ISNULL) -#define JBE_ISBOOL(je_) (((je_).header & JENTRY_TYPEMASK & JENTRY_ISBOOL) == JENTRY_ISBOOL) -#define JBE_ISBOOL_TRUE(je_) (((je_).header & JENTRY_TYPEMASK) == JENTRY_ISTRUE) -#define JBE_ISBOOL_FALSE(je_) (JBE_ISBOOL(je_) && !JBE_ISBOOL_TRUE(je_)) - -/* Get offset for Jentry */ -#define JBE_ENDPOS(je_) ((je_).header & JENTRY_POSMASK) -#define JBE_OFF(je_) (JBE_ISFIRST(je_) ? 0 : JBE_ENDPOS((&(je_))[-1])) -#define JBE_LEN(je_) (JBE_ISFIRST(je_) ? \ - JBE_ENDPOS(je_) \ - : JBE_ENDPOS(je_) - JBE_ENDPOS((&(je_))[-1])) - -/* Flags indicating a stage of sequential Jsonb processing */ -#define WJB_DONE 0x000 -#define WJB_KEY 0x001 -#define WJB_VALUE 0x002 -#define WJB_ELEM 0x004 -#define WJB_BEGIN_ARRAY 0x008 -#define WJB_END_ARRAY 0x010 -#define WJB_BEGIN_OBJECT 0x020 -#define WJB_END_OBJECT 0x040 +/* Tokens used when sequentially processing a jsonb value */ +typedef enum +{ + WJB_DONE, + WJB_KEY, + WJB_VALUE, + WJB_ELEM, + WJB_BEGIN_ARRAY, + WJB_END_ARRAY, + WJB_BEGIN_OBJECT, + WJB_END_OBJECT +} JsonbIteratorToken; /* * When using a GIN index for jsonb, we choose to index both keys and values. @@ -98,7 +56,6 @@ typedef struct JsonbPair JsonbPair; typedef struct JsonbValue JsonbValue; -typedef char *JsonbSuperHeader; /* * Jsonbs are varlena objects, so must meet the varlena convention that the @@ -109,35 +66,115 @@ typedef char *JsonbSuperHeader; * representation. Often, JsonbValues are just shims through which a Jsonb * buffer is accessed, but they can also be deep copied and passed around. * - * We have an abstraction called a "superheader". This is a pointer that - * conventionally points to the first item after our 4-byte uncompressed - * varlena header, from which we can read flags using bitwise operations. + * Jsonb is a tree structure. Each node in the tree consists of a JEntry + * header, and a variable-length content. The JEntry header indicates what + * kind of a node it is, e.g. a string or an array, and the offset and length + * of its variable-length portion within the container. * - * Frequently, we pass a superheader reference to a function, and it doesn't - * matter if it points to just after the start of a Jsonb, or to a temp buffer. + * The JEntry and the content of a node are not stored physically together. + * Instead, the container array or object has an array that holds the JEntrys + * of all the child nodes, followed by their variable-length portions. + * + * The root node is an exception; it has no parent array or object that could + * hold its JEntry. Hence, no JEntry header is stored for the root node. It + * is implicitly known that the the root node must be an array or an object, + * so we can get away without the type indicator as long as we can distinguish + * the two. For that purpose, both an array and an object begins with a uint32 + * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked + * scalar value needs to be stored as a Jsonb value, what we actually store is + * an array with one element, with the flags in the array's header field set + * to JB_FSCALAR | JB_FARRAY. + * + * To encode the length and offset of the variable-length portion of each + * node in a compact way, the JEntry stores only the end offset within the + * variable-length portion of the container node. For the first JEntry in the + * container's JEntry array, that equals to the length of the node data. For + * convenience, the JENTRY_ISFIRST flag is set. The begin offset and length + * of the rest of the entries can be calculated using the end offset of the + * previous JEntry in the array. + * + * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct, + * the variable-length portion of some node types is aligned to a 4-byte + * boundary, while others are not. When alignment is needed, the padding is + * in the beginning of the node that requires it. For example, if a numeric + * node is stored after a string node, so that the numeric node begins at + * offset 3, the variable-length portion of the numeric node will begin with + * one padding byte. */ -typedef struct -{ - int32 vl_len_; /* varlena header (do not touch directly!) */ - uint32 superheader; - /* (array of JEntry follows, size determined using uint32 superheader) */ -} Jsonb; /* - * JEntry: there is one of these for each key _and_ value for objects. Arrays - * have one per element. + * Jentry format. + * + * The least significant 28 bits store the end offset of the entry (see + * JBE_ENDPOS, JBE_OFF, JBE_LEN macros below). The next three bits + * are used to store the type of the entry. The most significant bit + * is set on the first entry in an array of JEntrys. + */ +typedef uint32 JEntry; + +#define JENTRY_POSMASK 0x0FFFFFFF +#define JENTRY_TYPEMASK 0x70000000 +#define JENTRY_ISFIRST 0x80000000 + +/* values stored in the type bits */ +#define JENTRY_ISSTRING 0x00000000 +#define JENTRY_ISNUMERIC 0x10000000 +#define JENTRY_ISCONTAINER 0x20000000 /* array or object */ +#define JENTRY_ISBOOL_FALSE 0x30000000 +#define JENTRY_ISNULL 0x40000000 +#define JENTRY_ISBOOL_TRUE 0x70000000 + +/* Note possible multiple evaluations, also access to prior array element */ +#define JBE_ISFIRST(je_) (((je_) & JENTRY_ISFIRST) != 0) +#define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING) +#define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC) +#define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER) +#define JBE_ISNULL(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL) +#define JBE_ISBOOL_TRUE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE) +#define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE) +#define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_)) + +/* Get offset for Jentry */ +#define JBE_ENDPOS(je_) ((je_) & JENTRY_POSMASK) +#define JBE_OFF(je_) (JBE_ISFIRST(je_) ? 0 : JBE_ENDPOS((&(je_))[-1])) +#define JBE_LEN(je_) (JBE_ISFIRST(je_) ? \ + JBE_ENDPOS(je_) \ + : JBE_ENDPOS(je_) - JBE_ENDPOS((&(je_))[-1])) + +/* + * A jsonb array or object node, within a Jsonb Datum. * - * The position offset points to the _end_ so that we can get the length by - * subtraction from the previous entry. The JENTRY_ISFIRST flag indicates if - * there is a previous entry. + * An array has one child for each element. An object has two children for + * each key/value pair. */ +typedef struct JsonbContainer +{ + uint32 header; /* number of elements or key/value pairs, and + * flags */ + JEntry children[1]; /* variable length */ + + /* the data for each child node follows. */ +} JsonbContainer; + +/* flags for the header-field in JsonbContainer */ +#define JB_CMASK 0x0FFFFFFF +#define JB_FSCALAR 0x10000000 +#define JB_FOBJECT 0x20000000 +#define JB_FARRAY 0x40000000 + +/* The top-level on-disk format for a jsonb datum. */ typedef struct { - uint32 header; /* Shares some flags with superheader */ -} JEntry; + int32 vl_len_; /* varlena header (do not touch directly!) */ + JsonbContainer root; +} Jsonb; + +/* convenience macros for accessing the root container in a Jsonb datum */ +#define JB_ROOT_COUNT(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_CMASK) +#define JB_ROOT_IS_SCALAR(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FSCALAR) +#define JB_ROOT_IS_OBJECT(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FOBJECT) +#define JB_ROOT_IS_ARRAY(jbp_) ( *(uint32*) VARDATA(jbp_) & JB_FARRAY) -#define IsAJsonbScalar(jsonbval) ((jsonbval)->type >= jbvNull && \ - (jsonbval)->type <= jbvBool) /* * JsonbValue: In-memory representation of Jsonb. This is a convenient @@ -161,8 +198,6 @@ struct JsonbValue jbvBinary } type; /* Influences sort order */ - int estSize; /* Estimated size of node (including subnodes) */ - union { Numeric numeric; @@ -189,11 +224,14 @@ struct JsonbValue struct { int len; - char *data; + JsonbContainer *data; } binary; } val; }; +#define IsAJsonbScalar(jsonbval) ((jsonbval)->type >= jbvNull && \ + (jsonbval)->type <= jbvBool) + /* * Pair within an Object. * @@ -294,27 +332,24 @@ extern Datum gin_consistent_jsonb_hash(PG_FUNCTION_ARGS); extern Datum gin_triconsistent_jsonb_hash(PG_FUNCTION_ARGS); /* Support functions */ -extern int compareJsonbSuperHeaderValue(JsonbSuperHeader a, - JsonbSuperHeader b); -extern JsonbValue *findJsonbValueFromSuperHeader(JsonbSuperHeader sheader, +extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b); +extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader, uint32 flags, - uint32 *lowbound, JsonbValue *key); -extern JsonbValue *getIthJsonbValueFromSuperHeader(JsonbSuperHeader sheader, +extern JsonbValue *getIthJsonbValueFromContainer(JsonbContainer *sheader, uint32 i); -extern JsonbValue *pushJsonbValue(JsonbParseState **pstate, int seq, - JsonbValue *scalarVal); -extern JsonbIterator *JsonbIteratorInit(JsonbSuperHeader buffer); -extern int JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, +extern JsonbValue *pushJsonbValue(JsonbParseState **pstate, + JsonbIteratorToken seq, JsonbValue *scalarVal); +extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container); +extern JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested); extern Jsonb *JsonbValueToJsonb(JsonbValue *val); extern bool JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained); -extern JsonbValue *arrayToJsonbSortedArray(ArrayType *a); extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash); /* jsonb.c support function */ -extern char *JsonbToCString(StringInfo out, JsonbSuperHeader in, +extern char *JsonbToCString(StringInfo out, JsonbContainer *in, int estimated_len); #endif /* __JSONB_H__ */ |