diff options
author | Andrew Dunstan <andrew@dunslane.net> | 2014-03-23 16:40:19 -0400 |
---|---|---|
committer | Andrew Dunstan <andrew@dunslane.net> | 2014-03-23 16:40:19 -0400 |
commit | d9134d0a355cfa447adc80db4505d5931084278a (patch) | |
tree | cefe155d0f0f71b9279444a86eab4b1b1facafdb /src/backend | |
parent | b2b2491b06074e68fc7c96148cb0fdf0c8eb0469 (diff) | |
download | postgresql-d9134d0a355cfa447adc80db4505d5931084278a.tar.gz postgresql-d9134d0a355cfa447adc80db4505d5931084278a.zip |
Introduce jsonb, a structured format for storing json.
The new format accepts exactly the same data as the json type. However, it is
stored in a format that does not require reparsing the orgiginal text in order
to process it, making it much more suitable for indexing and other operations.
Insignificant whitespace is discarded, and the order of object keys is not
preserved. Neither are duplicate object keys kept - the later value for a given
key is the only one stored.
The new type has all the functions and operators that the json type has,
with the exception of the json generation functions (to_json, json_agg etc.)
and with identical semantics. In addition, there are operator classes for
hash and btree indexing, and two classes for GIN indexing, that have no
equivalent in the json type.
This feature grew out of previous work by Oleg Bartunov and Teodor Sigaev, which
was intended to provide similar facilities to a nested hstore type, but which
in the end proved to have some significant compatibility issues.
Authors: Oleg Bartunov, Teodor Sigaev, Peter Geoghegan and Andrew Dunstan.
Review: Andres Freund
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/catalog/system_views.sql | 8 | ||||
-rw-r--r-- | src/backend/utils/adt/Makefile | 10 | ||||
-rw-r--r-- | src/backend/utils/adt/json.c | 42 | ||||
-rw-r--r-- | src/backend/utils/adt/jsonb.c | 468 | ||||
-rw-r--r-- | src/backend/utils/adt/jsonb_gin.c | 646 | ||||
-rw-r--r-- | src/backend/utils/adt/jsonb_op.c | 295 | ||||
-rw-r--r-- | src/backend/utils/adt/jsonb_util.c | 1872 | ||||
-rw-r--r-- | src/backend/utils/adt/jsonfuncs.c | 1151 | ||||
-rw-r--r-- | src/backend/utils/adt/numeric.c | 38 |
9 files changed, 4418 insertions, 112 deletions
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 053d7585ca4..662040261e9 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -825,6 +825,14 @@ CREATE OR REPLACE FUNCTION json_populate_recordset(base anyelement, from_json json, use_json_as_text boolean DEFAULT false) RETURNS SETOF anyelement LANGUAGE internal STABLE ROWS 100 AS 'json_populate_recordset'; +CREATE OR REPLACE FUNCTION + jsonb_populate_record(base anyelement, from_json jsonb, use_json_as_text boolean DEFAULT false) + RETURNS anyelement LANGUAGE internal STABLE AS 'jsonb_populate_record'; + +CREATE OR REPLACE FUNCTION + jsonb_populate_recordset(base anyelement, from_json jsonb, use_json_as_text boolean DEFAULT false) + RETURNS SETOF anyelement LANGUAGE internal STABLE ROWS 100 AS 'jsonb_populate_recordset'; + CREATE OR REPLACE FUNCTION pg_logical_slot_get_changes( IN slotname name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}', OUT location pg_lsn, OUT xid xid, OUT data text) diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 644687954b2..6b23069e26c 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -21,11 +21,11 @@ OBJS = acl.o arrayfuncs.o array_selfuncs.o array_typanalyze.o \ cash.o char.o date.o datetime.o datum.o dbsize.o domains.o \ encode.o enum.o float.o format_type.o formatting.o genfile.o \ geo_ops.o geo_selfuncs.o inet_cidr_ntop.o inet_net_pton.o int.o \ - int8.o json.o jsonfuncs.o like.o \ - lockfuncs.o mac.o misc.o nabstime.o name.o network.o numeric.o \ - numutils.o oid.o oracle_compat.o orderedsetaggs.o \ - pg_lzcompress.o pg_locale.o pg_lsn.o pgstatfuncs.o \ - pseudotypes.o quote.o rangetypes.o rangetypes_gist.o \ + int8.o json.o jsonb.o jsonb_gin.o jsonb_op.o jsonb_util.o \ + jsonfuncs.o like.o lockfuncs.o mac.o misc.o nabstime.o name.o \ + network.o numeric.o numutils.o oid.o oracle_compat.o \ + orderedsetaggs.o pg_lzcompress.o pg_locale.o pg_lsn.o \ + pgstatfuncs.o pseudotypes.o quote.o rangetypes.o rangetypes_gist.o \ rangetypes_selfuncs.o rangetypes_spgist.o rangetypes_typanalyze.o \ regexp.o regproc.o ri_triggers.o rowtypes.o ruleutils.o \ selfuncs.o tid.o timestamp.o trigfuncs.o \ diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index 97a0e9f211e..c34a1bb50be 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -210,22 +210,17 @@ Datum json_recv(PG_FUNCTION_ARGS) { StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); - text *result; char *str; int nbytes; JsonLexContext *lex; str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); - result = palloc(nbytes + VARHDRSZ); - SET_VARSIZE(result, nbytes + VARHDRSZ); - memcpy(VARDATA(result), str, nbytes); - /* Validate it. */ - lex = makeJsonLexContext(result, false); + lex = makeJsonLexContextCstringLen(str, nbytes, false); pg_parse_json(lex, &nullSemAction); - PG_RETURN_TEXT_P(result); + PG_RETURN_TEXT_P(cstring_to_text_with_len(str, nbytes)); } /* @@ -236,15 +231,26 @@ json_recv(PG_FUNCTION_ARGS) * * Without is better as it makes the processing faster, so only make one * if really required. + * + * If you already have the json as a text* value, use the first of these + * functions, otherwise use makeJsonLexContextCstringLen(). */ JsonLexContext * makeJsonLexContext(text *json, bool need_escapes) { + return makeJsonLexContextCstringLen(VARDATA(json), + VARSIZE(json) - VARHDRSZ, + need_escapes); +} + +JsonLexContext * +makeJsonLexContextCstringLen(char *json, int len, bool need_escapes) +{ JsonLexContext *lex = palloc0(sizeof(JsonLexContext)); - lex->input = lex->token_terminator = lex->line_start = VARDATA(json); + lex->input = lex->token_terminator = lex->line_start = json; lex->line_number = 1; - lex->input_length = VARSIZE(json) - VARHDRSZ; + lex->input_length = len; if (need_escapes) lex->strval = makeStringInfo(); return lex; @@ -1274,7 +1280,7 @@ datum_to_json(Datum val, bool is_null, StringInfo result, pfree(outputstr); break; case TYPCATEGORY_JSON: - /* JSON will already be escaped */ + /* JSON and JSONB will already be escaped */ outputstr = OidOutputFunctionCall(typoutputfunc, val); appendStringInfoString(result, outputstr); pfree(outputstr); @@ -1406,7 +1412,7 @@ array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds) tcategory = TYPCATEGORY_JSON_CAST; else if (element_type == RECORDOID) tcategory = TYPCATEGORY_COMPOSITE; - else if (element_type == JSONOID) + else if (element_type == JSONOID || element_type == JSONBOID) tcategory = TYPCATEGORY_JSON; else tcategory = TypeCategory(element_type); @@ -1501,7 +1507,8 @@ composite_to_json(Datum composite, StringInfo result, bool use_line_feeds) tcategory = TYPCATEGORY_ARRAY; else if (tupdesc->attrs[i]->atttypid == RECORDOID) tcategory = TYPCATEGORY_COMPOSITE; - else if (tupdesc->attrs[i]->atttypid == JSONOID) + else if (tupdesc->attrs[i]->atttypid == JSONOID || + tupdesc->attrs[i]->atttypid == JSONBOID) tcategory = TYPCATEGORY_JSON; else tcategory = TypeCategory(tupdesc->attrs[i]->atttypid); @@ -1689,7 +1696,7 @@ to_json(PG_FUNCTION_ARGS) tcategory = TYPCATEGORY_ARRAY; else if (val_type == RECORDOID) tcategory = TYPCATEGORY_COMPOSITE; - else if (val_type == JSONOID) + else if (val_type == JSONOID || val_type == JSONBOID) tcategory = TYPCATEGORY_JSON; else tcategory = TypeCategory(val_type); @@ -1783,7 +1790,7 @@ json_agg_transfn(PG_FUNCTION_ARGS) tcategory = TYPCATEGORY_ARRAY; else if (val_type == RECORDOID) tcategory = TYPCATEGORY_COMPOSITE; - else if (val_type == JSONOID) + else if (val_type == JSONOID || val_type == JSONBOID) tcategory = TYPCATEGORY_JSON; else tcategory = TypeCategory(val_type); @@ -2346,12 +2353,15 @@ escape_json(StringInfo buf, const char *str) Datum json_typeof(PG_FUNCTION_ARGS) { - text *json = PG_GETARG_TEXT_P(0); + text *json; - JsonLexContext *lex = makeJsonLexContext(json, false); + JsonLexContext *lex; JsonTokenType tok; char *type; + json = PG_GETARG_TEXT_P(0); + lex = makeJsonLexContext(json, false); + /* Lex exactly one token from the input and check its type. */ json_lex(lex); tok = lex_peek(lex); diff --git a/src/backend/utils/adt/jsonb.c b/src/backend/utils/adt/jsonb.c new file mode 100644 index 00000000000..b30e79e425b --- /dev/null +++ b/src/backend/utils/adt/jsonb.c @@ -0,0 +1,468 @@ +/*------------------------------------------------------------------------- + * + * jsonb.c + * I/O routines for jsonb type + * + * Copyright (c) 2014, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/jsonb.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "libpq/pqformat.h" +#include "utils/builtins.h" +#include "utils/json.h" +#include "utils/jsonapi.h" +#include "utils/jsonb.h" + +typedef struct JsonbInState +{ + JsonbParseState *parseState; + JsonbValue *res; +} JsonbInState; + +static inline Datum jsonb_from_cstring(char *json, int len); +static size_t checkStringLen(size_t len); +static void jsonb_in_object_start(void *pstate); +static void jsonb_in_object_end(void *pstate); +static void jsonb_in_array_start(void *pstate); +static void jsonb_in_array_end(void *pstate); +static void jsonb_in_object_field_start(void *pstate, char *fname, bool isnull); +static void jsonb_put_escaped_value(StringInfo out, JsonbValue * scalarVal); +static void jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype); +char *JsonbToCString(StringInfo out, char *in, int estimated_len); + +/* + * jsonb type input function + */ +Datum +jsonb_in(PG_FUNCTION_ARGS) +{ + char *json = PG_GETARG_CSTRING(0); + + return jsonb_from_cstring(json, strlen(json)); +} + +/* + * jsonb type recv function + * + * The type is sent as text in binary mode, so this is almost the same + * as the input function, but it's prefixed with a version number so we + * can change the binary format sent in future if necessary. For now, + * only version 1 is supported. + */ +Datum +jsonb_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + int version = pq_getmsgint(buf, 1); + char *str; + int nbytes; + + if (version == 1) + str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); + else + elog(ERROR, "Unsupported jsonb version number %d", version); + + return jsonb_from_cstring(str, nbytes); +} + +/* + * jsonb type output function + */ +Datum +jsonb_out(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + char *out; + + out = JsonbToCString(NULL, VARDATA(jb), VARSIZE(jb)); + + PG_RETURN_CSTRING(out); +} + +/* + * jsonb type send function + * + * Just send jsonb as a version number, then a string of text + */ +Datum +jsonb_send(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + StringInfoData buf; + StringInfo jtext = makeStringInfo(); + int version = 1; + + (void) JsonbToCString(jtext, VARDATA(jb), VARSIZE(jb)); + + pq_begintypsend(&buf); + pq_sendint(&buf, version, 1); + pq_sendtext(&buf, jtext->data, jtext->len); + pfree(jtext->data); + pfree(jtext); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * SQL function jsonb_typeof(jsonb) -> text + * + * This function is here because the analog json function is in json.c, since + * it uses the json parser internals not exposed elsewhere. + */ +Datum +jsonb_typeof(PG_FUNCTION_ARGS) +{ + Jsonb *in = PG_GETARG_JSONB(0); + JsonbIterator *it; + JsonbValue v; + char *result; + + if (JB_ROOT_IS_OBJECT(in)) + result = "object"; + else if (JB_ROOT_IS_ARRAY(in) && !JB_ROOT_IS_SCALAR(in)) + result = "array"; + else + { + Assert(JB_ROOT_IS_SCALAR(in)); + + it = JsonbIteratorInit(VARDATA_ANY(in)); + + /* + * A root scalar is stored as an array of one element, so we get the + * array and then its first (and only) member. + */ + (void) JsonbIteratorNext(&it, &v, true); + Assert(v.type == jbvArray); + (void) JsonbIteratorNext(&it, &v, true); + switch (v.type) + { + case jbvNull: + result = "null"; + break; + case jbvString: + result = "string"; + break; + case jbvNumeric: + result = "number"; + break; + case jbvBool: + result = "boolean"; + break; + default: + elog(ERROR, "unknown jsonb scalar type"); + } + } + + PG_RETURN_TEXT_P(cstring_to_text(result)); +} + +/* + * jsonb_from_cstring + * + * Turns json string into a jsonb Datum. + * + * Uses the json parser (with hooks) to construct a jsonb. + */ +static inline Datum +jsonb_from_cstring(char *json, int len) +{ + JsonLexContext *lex; + JsonbInState state; + JsonSemAction sem; + + memset(&state, 0, sizeof(state)); + memset(&sem, 0, sizeof(sem)); + lex = makeJsonLexContextCstringLen(json, len, true); + + sem.semstate = (void *) &state; + + sem.object_start = jsonb_in_object_start; + sem.array_start = jsonb_in_array_start; + sem.object_end = jsonb_in_object_end; + sem.array_end = jsonb_in_array_end; + sem.scalar = jsonb_in_scalar; + sem.object_field_start = jsonb_in_object_field_start; + + pg_parse_json(lex, &sem); + + /* after parsing, the item member has the composed jsonb structure */ + PG_RETURN_POINTER(JsonbValueToJsonb(state.res)); +} + +static size_t +checkStringLen(size_t len) +{ + if (len > JENTRY_POSMASK) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("string too long to represent as jsonb string"), + errdetail("Due to an implementation restriction, jsonb strings cannot exceed %d bytes.", + JENTRY_POSMASK))); + + return len; +} + +static void +jsonb_in_object_start(void *pstate) +{ + JsonbInState *_state = (JsonbInState *) pstate; + + _state->res = pushJsonbValue(&_state->parseState, WJB_BEGIN_OBJECT, NULL); +} + +static void +jsonb_in_object_end(void *pstate) +{ + JsonbInState *_state = (JsonbInState *) pstate; + + _state->res = pushJsonbValue(&_state->parseState, WJB_END_OBJECT, NULL); +} + +static void +jsonb_in_array_start(void *pstate) +{ + JsonbInState *_state = (JsonbInState *) pstate; + + _state->res = pushJsonbValue(&_state->parseState, WJB_BEGIN_ARRAY, NULL); +} + +static void +jsonb_in_array_end(void *pstate) +{ + JsonbInState *_state = (JsonbInState *) pstate; + + _state->res = pushJsonbValue(&_state->parseState, WJB_END_ARRAY, NULL); +} + +static void +jsonb_in_object_field_start(void *pstate, char *fname, bool isnull) +{ + JsonbInState *_state = (JsonbInState *) pstate; + JsonbValue v; + + Assert (fname != NULL); + v.type = jbvString; + v.string.len = checkStringLen(strlen(fname)); + v.string.val = pnstrdup(fname, v.string.len); + v.estSize = sizeof(JEntry) + v.string.len; + + _state->res = pushJsonbValue(&_state->parseState, WJB_KEY, &v); +} + +static void +jsonb_put_escaped_value(StringInfo out, JsonbValue * scalarVal) +{ + switch (scalarVal->type) + { + case jbvNull: + appendBinaryStringInfo(out, "null", 4); + break; + case jbvString: + escape_json(out, pnstrdup(scalarVal->string.val, scalarVal->string.len)); + break; + case jbvNumeric: + appendStringInfoString(out, + DatumGetCString(DirectFunctionCall1(numeric_out, + PointerGetDatum(scalarVal->numeric)))); + break; + case jbvBool: + if (scalarVal->boolean) + appendBinaryStringInfo(out, "true", 4); + else + appendBinaryStringInfo(out, "false", 5); + break; + default: + elog(ERROR, "unknown jsonb scalar type"); + } +} + +/* + * For jsonb we always want the de-escaped value - that's what's in token + */ +static void +jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype) +{ + JsonbInState *_state = (JsonbInState *) pstate; + JsonbValue v; + + v.estSize = sizeof(JEntry); + + switch (tokentype) + { + + case JSON_TOKEN_STRING: + Assert (token != NULL); + v.type = jbvString; + v.string.len = checkStringLen(strlen(token)); + v.string.val = pnstrdup(token, v.string.len); + v.estSize += v.string.len; + break; + case JSON_TOKEN_NUMBER: + /* + * No need to check size of numeric values, because maximum numeric + * size is well below the JsonbValue restriction + */ + Assert (token != NULL); + v.type = jbvNumeric; + v.numeric = DatumGetNumeric(DirectFunctionCall3(numeric_in, CStringGetDatum(token), 0, -1)); + v.estSize += VARSIZE_ANY(v.numeric) + sizeof(JEntry) /* alignment */ ; + break; + case JSON_TOKEN_TRUE: + v.type = jbvBool; + v.boolean = true; + break; + case JSON_TOKEN_FALSE: + v.type = jbvBool; + v.boolean = false; + break; + case JSON_TOKEN_NULL: + v.type = jbvNull; + break; + default: + /* should not be possible */ + elog(ERROR, "invalid json token type"); + break; + } + + if (_state->parseState == NULL) + { + /* single scalar */ + JsonbValue va; + + va.type = jbvArray; + va.array.rawScalar = true; + va.array.nElems = 1; + + _state->res = pushJsonbValue(&_state->parseState, WJB_BEGIN_ARRAY, &va); + _state->res = pushJsonbValue(&_state->parseState, WJB_ELEM, &v); + _state->res = pushJsonbValue(&_state->parseState, WJB_END_ARRAY, NULL); + } + else + { + JsonbValue *o = &_state->parseState->contVal; + + switch (o->type) + { + case jbvArray: + _state->res = pushJsonbValue(&_state->parseState, WJB_ELEM, &v); + break; + case jbvObject: + _state->res = pushJsonbValue(&_state->parseState, WJB_VALUE, &v); + break; + default: + elog(ERROR, "unexpected parent of nested structure"); + } + } +} + +/* + * JsonbToCString + * Converts jsonb value to a C-string. + * + * If 'out' argument is non-null, the resulting C-string is stored inside the + * StringBuffer. The resulting string is always returned. + * + * A typical case for passing the StringInfo in rather than NULL is where the + * caller wants access to the len attribute without having to call strlen, e.g. + * if they are converting it to a text* object. + */ +char * +JsonbToCString(StringInfo out, JsonbSuperHeader in, int estimated_len) +{ + bool first = true; + JsonbIterator *it; + int type = 0; + JsonbValue v; + int level = 0; + bool redo_switch = false; + + if (out == NULL) + out = makeStringInfo(); + + enlargeStringInfo(out, (estimated_len >= 0) ? estimated_len : 64); + + it = JsonbIteratorInit(in); + + while (redo_switch || + ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)) + { + redo_switch = false; + switch (type) + { + case WJB_BEGIN_ARRAY: + if (!first) + appendBinaryStringInfo(out, ", ", 2); + first = true; + + if (!v.array.rawScalar) + appendStringInfoChar(out, '['); + level++; + break; + case WJB_BEGIN_OBJECT: + if (!first) + appendBinaryStringInfo(out, ", ", 2); + first = true; + appendStringInfoCharMacro(out, '{'); + + level++; + break; + case WJB_KEY: + if (!first) + appendBinaryStringInfo(out, ", ", 2); + first = true; + + /* json rules guarantee this is a string */ + jsonb_put_escaped_value(out, &v); + appendBinaryStringInfo(out, ": ", 2); + + type = JsonbIteratorNext(&it, &v, false); + if (type == WJB_VALUE) + { + first = false; + jsonb_put_escaped_value(out, &v); + } + else + { + Assert(type == WJB_BEGIN_OBJECT || type == WJB_BEGIN_ARRAY); + + /* + * We need to rerun the current switch() since we need to + * output the object which we just got from the iterator + * before calling the iterator again. + */ + redo_switch = true; + } + break; + case WJB_ELEM: + if (!first) + appendBinaryStringInfo(out, ", ", 2); + else + first = false; + + jsonb_put_escaped_value(out, &v); + break; + case WJB_END_ARRAY: + level--; + if (!v.array.rawScalar) + appendStringInfoChar(out, ']'); + first = false; + break; + case WJB_END_OBJECT: + level--; + appendStringInfoCharMacro(out, '}'); + first = false; + break; + default: + elog(ERROR, "unknown flag of jsonb iterator"); + } + } + + Assert(level == 0); + + return out->data; +} diff --git a/src/backend/utils/adt/jsonb_gin.c b/src/backend/utils/adt/jsonb_gin.c new file mode 100644 index 00000000000..4a6b8fd6888 --- /dev/null +++ b/src/backend/utils/adt/jsonb_gin.c @@ -0,0 +1,646 @@ +/*------------------------------------------------------------------------- + * + * jsonb_gin.c + * GIN support functions for jsonb + * + * Copyright (c) 2014, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/jsonb_gin.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/gin.h" +#include "access/skey.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "utils/builtins.h" +#include "utils/jsonb.h" + +typedef struct PathHashStack +{ + uint32 hash; + struct PathHashStack *parent; +} PathHashStack; + +static text *make_text_key(const char *str, int len, char flag); +static text *make_scalar_key(const JsonbValue * scalarVal, char flag); + +/* + * + * jsonb_ops GIN opclass support functions + * + */ +Datum +gin_compare_jsonb(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + int32 result; + char *a1p, + *a2p; + int len1, + len2; + + a1p = VARDATA_ANY(arg1); + a2p = VARDATA_ANY(arg2); + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + /* Compare text as bttextcmp does, but always using C collation */ + result = varstr_cmp(a1p, len1, a2p, len2, C_COLLATION_OID); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(result); +} + +Datum +gin_extract_jsonb(PG_FUNCTION_ARGS) +{ + Jsonb *jb = (Jsonb *) PG_GETARG_JSONB(0); + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + Datum *entries = NULL; + int total = 2 * JB_ROOT_COUNT(jb); + int i = 0, + r; + JsonbIterator *it; + JsonbValue v; + + if (total == 0) + { + *nentries = 0; + PG_RETURN_POINTER(NULL); + } + + entries = (Datum *) palloc(sizeof(Datum) * total); + + it = JsonbIteratorInit(VARDATA(jb)); + + while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + if (i >= total) + { + total *= 2; + entries = (Datum *) repalloc(entries, sizeof(Datum) * total); + } + + /* + * Serialize keys and elements equivalently, but only when elements + * are Jsonb strings. Otherwise, serialize elements as values. Array + * elements are indexed as keys, for the benefit of + * JsonbExistsStrategyNumber. Our definition of existence does not + * allow for checking the existence of a non-jbvString element (just + * like the definition of the underlying operator), because the + * operator takes a text rhs argument (which is taken as a proxy for an + * equivalent Jsonb string). + * + * The way existence is represented does not preclude an alternative + * existence operator, that takes as its rhs value an arbitrarily + * internally-typed Jsonb. The only reason that isn't the case here is + * that the existence operator is only really intended to determine if + * an object has a certain key (object pair keys are of course + * invariably strings), which is extended to jsonb arrays. You could + * think of the default Jsonb definition of existence as being + * equivalent to a definition where all types of scalar array elements + * are keys that we can check the existence of, while just forbidding + * non-string notation. This inflexibility prevents the user from + * having to qualify that the rhs string is a raw scalar string (that + * is, naturally no internal string quoting in required for the text + * argument), and allows us to not set the reset flag for + * JsonbExistsStrategyNumber, since we know that keys are strings for + * both objects and arrays, and don't have to further account for type + * mismatch. Not having to set the reset flag makes it less than + * tempting to tighten up the definition of existence to preclude array + * elements entirely, which would arguably be a simpler alternative. + * In any case the infrastructure used to implement the existence + * operator could trivially support this hypothetical, slightly + * distinct definition of existence. + */ + switch (r) + { + case WJB_KEY: + /* Serialize key separately, for existence strategies */ + entries[i++] = PointerGetDatum(make_scalar_key(&v, JKEYELEM)); + break; + case WJB_ELEM: + if (v.type == jbvString) + entries[i++] = PointerGetDatum(make_scalar_key(&v, JKEYELEM)); + else + entries[i++] = PointerGetDatum(make_scalar_key(&v, JVAL)); + break; + case WJB_VALUE: + entries[i++] = PointerGetDatum(make_scalar_key(&v, JVAL)); + break; + default: + continue; + } + } + + *nentries = i; + + PG_RETURN_POINTER(entries); +} + +Datum +gin_extract_jsonb_query(PG_FUNCTION_ARGS) +{ + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + StrategyNumber strategy = PG_GETARG_UINT16(2); + int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); + Datum *entries; + + if (strategy == JsonbContainsStrategyNumber) + { + /* Query is a jsonb, so just apply gin_extract_jsonb... */ + entries = (Datum *) + DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb, + PG_GETARG_DATUM(0), + PointerGetDatum(nentries))); + /* ...although "contains {}" requires a full index scan */ + if (entries == NULL) + *searchMode = GIN_SEARCH_MODE_ALL; + } + else if (strategy == JsonbExistsStrategyNumber) + { + text *query = PG_GETARG_TEXT_PP(0); + text *item; + + *nentries = 1; + entries = (Datum *) palloc(sizeof(Datum)); + item = make_text_key(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query), + JKEYELEM); + entries[0] = PointerGetDatum(item); + } + else if (strategy == JsonbExistsAnyStrategyNumber || + strategy == JsonbExistsAllStrategyNumber) + { + ArrayType *query = PG_GETARG_ARRAYTYPE_P(0); + Datum *key_datums; + bool *key_nulls; + int key_count; + int i, + j; + text *item; + + deconstruct_array(query, + TEXTOID, -1, false, 'i', + &key_datums, &key_nulls, &key_count); + + entries = (Datum *) palloc(sizeof(Datum) * key_count); + + for (i = 0, j = 0; i < key_count; ++i) + { + /* Nulls in the array are ignored */ + if (key_nulls[i]) + continue; + item = make_text_key(VARDATA(key_datums[i]), + VARSIZE(key_datums[i]) - VARHDRSZ, + JKEYELEM); + entries[j++] = PointerGetDatum(item); + } + + *nentries = j; + /* ExistsAll with no keys should match everything */ + if (j == 0 && strategy == JsonbExistsAllStrategyNumber) + *searchMode = GIN_SEARCH_MODE_ALL; + } + else + { + elog(ERROR, "unrecognized strategy number: %d", strategy); + entries = NULL; /* keep compiler quiet */ + } + + PG_RETURN_POINTER(entries); +} + +Datum +gin_consistent_jsonb(PG_FUNCTION_ARGS) +{ + bool *check = (bool *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + + /* Jsonb *query = PG_GETARG_JSONB(2); */ + int32 nkeys = PG_GETARG_INT32(3); + + /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */ + bool *recheck = (bool *) PG_GETARG_POINTER(5); + bool res = true; + int32 i; + + if (strategy == JsonbContainsStrategyNumber) + { + /* + * Index doesn't have information about correspondence of Jsonb keys + * and values (as distinct from GIN keys, which a key/value pair is + * stored as), so invariably we recheck. Besides, there are some + * special rules around the containment of raw scalar arrays and + * regular arrays that are not represented here. However, if all of + * the keys are not present, that's sufficient reason to return false + * and finish immediately. + */ + *recheck = true; + for (i = 0; i < nkeys; i++) + { + if (!check[i]) + { + res = false; + break; + } + } + } + else if (strategy == JsonbExistsStrategyNumber) + { + /* Existence of key guaranteed in default search mode */ + *recheck = false; + res = true; + } + else if (strategy == JsonbExistsAnyStrategyNumber) + { + /* Existence of key guaranteed in default search mode */ + *recheck = false; + res = true; + } + else if (strategy == JsonbExistsAllStrategyNumber) + { + /* Testing for the presence of all keys gives an exact result */ + *recheck = false; + for (i = 0; i < nkeys; i++) + { + if (!check[i]) + { + res = false; + break; + } + } + } + else + elog(ERROR, "unrecognized strategy number: %d", strategy); + + PG_RETURN_BOOL(res); +} + +Datum +gin_triconsistent_jsonb(PG_FUNCTION_ARGS) +{ + GinLogicValue *check = (GinLogicValue *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + /* Jsonb *query = PG_GETARG_JSONB(2); */ + int32 nkeys = PG_GETARG_INT32(3); + /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */ + GinLogicValue res = GIN_TRUE; + + int32 i; + + if (strategy == JsonbContainsStrategyNumber) + { + bool has_maybe = false; + + /* + * All extracted keys must be present. Combination of GIN_MAYBE and + * GIN_TRUE gives GIN_MAYBE result because then all keys may be + * present. + */ + for (i = 0; i < nkeys; i++) + { + if (check[i] == GIN_FALSE) + { + res = GIN_FALSE; + break; + } + if (check[i] == GIN_MAYBE) + { + res = GIN_MAYBE; + has_maybe = true; + } + } + + /* + * Index doesn't have information about correspondence of Jsonb keys + * and values (as distinct from GIN keys, which a key/value pair is + * stored as), so invariably we recheck. This is also reflected in how + * GIN_MAYBE is given in response to there being no GIN_MAYBE input. + */ + if (!has_maybe && res == GIN_TRUE) + res = GIN_MAYBE; + } + else if (strategy == JsonbExistsStrategyNumber || + strategy == JsonbExistsAnyStrategyNumber) + { + /* Existence of key guaranteed in default search mode */ + res = GIN_FALSE; + for (i = 0; i < nkeys; i++) + { + if (check[i] == GIN_TRUE) + { + res = GIN_TRUE; + break; + } + if (check[i] == GIN_MAYBE) + { + res = GIN_MAYBE; + } + } + } + else if (strategy == JsonbExistsAllStrategyNumber) + { + /* Testing for the presence of all keys gives an exact result */ + for (i = 0; i < nkeys; i++) + { + if (check[i] == GIN_FALSE) + { + res = GIN_FALSE; + break; + } + if (check[i] == GIN_MAYBE) + { + res = GIN_MAYBE; + } + } + } + else + elog(ERROR, "unrecognized strategy number: %d", strategy); + + PG_RETURN_GIN_LOGIC_VALUE(res); +} + +/* + * + * jsonb_hash_ops GIN opclass support functions + * + */ +Datum +gin_consistent_jsonb_hash(PG_FUNCTION_ARGS) +{ + bool *check = (bool *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + /* Jsonb *query = PG_GETARG_JSONB(2); */ + int32 nkeys = PG_GETARG_INT32(3); + /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */ + bool *recheck = (bool *) PG_GETARG_POINTER(5); + bool res = true; + int32 i; + + if (strategy != JsonbContainsStrategyNumber) + elog(ERROR, "unrecognized strategy number: %d", strategy); + + /* + * jsonb_hash_ops index doesn't have information about correspondence + * of Jsonb keys and values (as distinct from GIN keys, which a + * key/value pair is stored as), so invariably we recheck. Besides, + * there are some special rules around the containment of raw scalar + * arrays and regular arrays that are not represented here. However, + * if all of the keys are not present, that's sufficient reason to + * return false and finish immediately. + */ + *recheck = true; + for (i = 0; i < nkeys; i++) + { + if (!check[i]) + { + res = false; + break; + } + } + + PG_RETURN_BOOL(res); +} + +Datum +gin_triconsistent_jsonb_hash(PG_FUNCTION_ARGS) +{ + GinLogicValue *check = (GinLogicValue *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + /* Jsonb *query = PG_GETARG_JSONB(2); */ + int32 nkeys = PG_GETARG_INT32(3); + /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */ + GinLogicValue res = GIN_TRUE; + int32 i; + bool has_maybe = false; + + if (strategy != JsonbContainsStrategyNumber) + elog(ERROR, "unrecognized strategy number: %d", strategy); + + /* + * All extracted keys must be present. A combination of GIN_MAYBE and + * GIN_TRUE induces a GIN_MAYBE result, because then all keys may be + * present. + */ + for (i = 0; i < nkeys; i++) + { + if (check[i] == GIN_FALSE) + { + res = GIN_FALSE; + break; + } + if (check[i] == GIN_MAYBE) + { + res = GIN_MAYBE; + has_maybe = true; + } + } + + /* + * jsonb_hash_ops index doesn't have information about correspondence of + * Jsonb keys and values (as distinct from GIN keys, which for this opclass + * are a hash of a pair, or a hash of just an element), so invariably we + * recheck. This is also reflected in how GIN_MAYBE is given in response + * to there being no GIN_MAYBE input. + */ + if (!has_maybe && res == GIN_TRUE) + res = GIN_MAYBE; + + PG_RETURN_GIN_LOGIC_VALUE(res); +} + +Datum +gin_extract_jsonb_hash(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + int total = 2 * JB_ROOT_COUNT(jb); + JsonbIterator *it; + JsonbValue v; + PathHashStack tail; + PathHashStack *stack; + int i = 0, + r; + Datum *entries = NULL; + + if (total == 0) + { + *nentries = 0; + PG_RETURN_POINTER(NULL); + } + + entries = (Datum *) palloc(sizeof(Datum) * total); + + it = JsonbIteratorInit(VARDATA(jb)); + + tail.parent = NULL; + tail.hash = 0; + stack = &tail; + + while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + PathHashStack *tmp; + + if (i >= total) + { + total *= 2; + entries = (Datum *) repalloc(entries, sizeof(Datum) * total); + } + + switch (r) + { + case WJB_BEGIN_ARRAY: + case WJB_BEGIN_OBJECT: + tmp = stack; + stack = (PathHashStack *) palloc(sizeof(PathHashStack)); + + /* + * Nesting an array within another array will not alter + * innermost scalar element hash values, but that seems + * inconsequential + */ + if (tmp->parent) + { + /* + * We pass forward hashes from previous container nesting + * levels so that nested arrays with an outermost nested + * object will have element hashes mixed with the outermost + * key. It's also somewhat useful to have nested objects + * innermost values have hashes that are a function of not + * just their own key, but outer keys too. + */ + stack->hash = tmp->hash; + } + else + { + /* + * At least nested level, initialize with stable container + * type proxy value + */ + stack->hash = (r == WJB_BEGIN_ARRAY)? JB_FARRAY:JB_FOBJECT; + } + stack->parent = tmp; + break; + case WJB_KEY: + /* Initialize hash from parent */ + stack->hash = stack->parent->hash; + JsonbHashScalarValue(&v, &stack->hash); + break; + case WJB_ELEM: + /* Elements have parent hash mixed in separately */ + stack->hash = stack->parent->hash; + case WJB_VALUE: + /* Element/value case */ + JsonbHashScalarValue(&v, &stack->hash); + entries[i++] = stack->hash; + break; + case WJB_END_ARRAY: + case WJB_END_OBJECT: + /* Pop the stack */ + tmp = stack->parent; + pfree(stack); + stack = tmp; + break; + default: + elog(ERROR, "invalid JsonbIteratorNext rc: %d", r); + } + } + + *nentries = i; + + PG_RETURN_POINTER(entries); +} + +Datum +gin_extract_jsonb_query_hash(PG_FUNCTION_ARGS) +{ + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + StrategyNumber strategy = PG_GETARG_UINT16(2); + int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); + Datum *entries; + + if (strategy != JsonbContainsStrategyNumber) + elog(ERROR, "unrecognized strategy number: %d", strategy); + + /* Query is a jsonb, so just apply gin_extract_jsonb... */ + entries = (Datum *) + DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_hash, + PG_GETARG_DATUM(0), + PointerGetDatum(nentries))); + + /* ...although "contains {}" requires a full index scan */ + if (entries == NULL) + *searchMode = GIN_SEARCH_MODE_ALL; + + PG_RETURN_POINTER(entries); +} + +/* + * Build a text value from a cstring and flag suitable for storage as a key + * value + */ +static text * +make_text_key(const char *str, int len, char flag) +{ + text *item; + + item = (text *) palloc(VARHDRSZ + len + 1); + SET_VARSIZE(item, VARHDRSZ + len + 1); + + *VARDATA(item) = flag; + + memcpy(VARDATA(item) + 1, str, len); + + return item; +} + +/* + * Create a textual representation of a jsonbValue for GIN storage. + */ +static text * +make_scalar_key(const JsonbValue * scalarVal, char flag) +{ + text *item; + char *cstr; + + switch (scalarVal->type) + { + case jbvNull: + item = make_text_key("n", 1, flag); + break; + case jbvBool: + item = make_text_key(scalarVal->boolean ? "t" : "f", 1, flag); + break; + case jbvNumeric: + /* + * A normalized textual representation, free of trailing zeroes is + * is required. + * + * It isn't ideal that numerics are stored in a relatively bulky + * textual format. However, it's a notationally convenient way of + * storing a "union" type in the GIN B-Tree, and indexing Jsonb + * strings takes precedence. + */ + cstr = numeric_normalize(scalarVal->numeric); + item = make_text_key(cstr, strlen(cstr), flag); + pfree(cstr); + break; + case jbvString: + item = make_text_key(scalarVal->string.val, scalarVal->string.len, + flag); + break; + default: + elog(ERROR, "invalid jsonb scalar type"); + } + + return item; +} diff --git a/src/backend/utils/adt/jsonb_op.c b/src/backend/utils/adt/jsonb_op.c new file mode 100644 index 00000000000..d6b1855c195 --- /dev/null +++ b/src/backend/utils/adt/jsonb_op.c @@ -0,0 +1,295 @@ +/*------------------------------------------------------------------------- + * + * jsonb_op.c + * Special operators for jsonb only, used by various index access methods + * + * Copyright (c) 2014, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/jsonb_op.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "miscadmin.h" +#include "utils/jsonb.h" + +Datum +jsonb_exists(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + text *key = PG_GETARG_TEXT_PP(1); + JsonbValue kval; + JsonbValue *v = NULL; + + /* + * We only match Object keys (which are naturally always Strings), or + * string elements in arrays. In particular, we do not match non-string + * scalar elements. Existence of a key/element is only considered at the + * top level. No recursion occurs. + */ + kval.type = jbvString; + kval.string.val = VARDATA_ANY(key); + kval.string.len = VARSIZE_ANY_EXHDR(key); + + v = findJsonbValueFromSuperHeader(VARDATA(jb), + JB_FOBJECT | JB_FARRAY, + NULL, + &kval); + + PG_RETURN_BOOL(v != NULL); +} + +Datum +jsonb_exists_any(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + ArrayType *keys = PG_GETARG_ARRAYTYPE_P(1); + JsonbValue *arrKey = arrayToJsonbSortedArray(keys); + uint32 *plowbound = NULL, + lowbound = 0; + int i; + + if (arrKey == NULL || arrKey->object.nPairs == 0) + PG_RETURN_BOOL(false); + + if (JB_ROOT_IS_OBJECT(jb)) + plowbound = &lowbound; + + /* + * We exploit the fact that the pairs list is already sorted into strictly + * increasing order to narrow the findJsonbValueFromSuperHeader search; + * each search can start one entry past the previous "found" entry, or at + * the lower bound of the last search. + */ + for (i = 0; i < arrKey->array.nElems; i++) + { + if (findJsonbValueFromSuperHeader(VARDATA(jb), + JB_FOBJECT | JB_FARRAY, + plowbound, + arrKey->array.elems + i) != NULL) + PG_RETURN_BOOL(true); + } + + PG_RETURN_BOOL(false); +} + +Datum +jsonb_exists_all(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + ArrayType *keys = PG_GETARG_ARRAYTYPE_P(1); + JsonbValue *arrKey = arrayToJsonbSortedArray(keys); + uint32 *plowbound = NULL; + uint32 lowbound = 0; + int i; + + if (arrKey == NULL || arrKey->array.nElems == 0) + PG_RETURN_BOOL(true); + + if (JB_ROOT_IS_OBJECT(jb)) + plowbound = &lowbound; + + /* + * We exploit the fact that the pairs list is already sorted into strictly + * increasing order to narrow the findJsonbValueFromSuperHeader search; + * each search can start one entry past the previous "found" entry, or at + * the lower bound of the last search. + */ + for (i = 0; i < arrKey->array.nElems; i++) + { + if (findJsonbValueFromSuperHeader(VARDATA(jb), + JB_FOBJECT | JB_FARRAY, + plowbound, + arrKey->array.elems + i) == NULL) + PG_RETURN_BOOL(false); + } + + PG_RETURN_BOOL(true); +} + +Datum +jsonb_contains(PG_FUNCTION_ARGS) +{ + Jsonb *val = PG_GETARG_JSONB(0); + Jsonb *tmpl = PG_GETARG_JSONB(1); + + JsonbIterator *it1, *it2; + + if (JB_ROOT_COUNT(val) < JB_ROOT_COUNT(tmpl) || + JB_ROOT_IS_OBJECT(val) != JB_ROOT_IS_OBJECT(tmpl)) + PG_RETURN_BOOL(false); + + it1 = JsonbIteratorInit(VARDATA(val)); + it2 = JsonbIteratorInit(VARDATA(tmpl)); + + PG_RETURN_BOOL(JsonbDeepContains(&it1, &it2)); +} + +Datum +jsonb_contained(PG_FUNCTION_ARGS) +{ + /* Commutator of "contains" */ + Jsonb *tmpl = PG_GETARG_JSONB(0); + Jsonb *val = PG_GETARG_JSONB(1); + + JsonbIterator *it1, *it2; + + if (JB_ROOT_COUNT(val) < JB_ROOT_COUNT(tmpl) || + JB_ROOT_IS_OBJECT(val) != JB_ROOT_IS_OBJECT(tmpl)) + PG_RETURN_BOOL(false); + + it1 = JsonbIteratorInit(VARDATA(val)); + it2 = JsonbIteratorInit(VARDATA(tmpl)); + + PG_RETURN_BOOL(JsonbDeepContains(&it1, &it2)); +} + +Datum +jsonb_ne(PG_FUNCTION_ARGS) +{ + Jsonb *jba = PG_GETARG_JSONB(0); + Jsonb *jbb = PG_GETARG_JSONB(1); + bool res; + + res = (compareJsonbSuperHeaderValue(VARDATA(jba), VARDATA(jbb)) != 0); + + PG_FREE_IF_COPY(jba, 0); + PG_FREE_IF_COPY(jbb, 1); + PG_RETURN_BOOL(res); +} + +/* + * B-Tree operator class operators, support function + */ +Datum +jsonb_lt(PG_FUNCTION_ARGS) +{ + Jsonb *jba = PG_GETARG_JSONB(0); + Jsonb *jbb = PG_GETARG_JSONB(1); + bool res; + + res = (compareJsonbSuperHeaderValue(VARDATA(jba), VARDATA(jbb)) < 0); + + PG_FREE_IF_COPY(jba, 0); + PG_FREE_IF_COPY(jbb, 1); + PG_RETURN_BOOL(res); +} + +Datum +jsonb_gt(PG_FUNCTION_ARGS) +{ + Jsonb *jba = PG_GETARG_JSONB(0); + Jsonb *jbb = PG_GETARG_JSONB(1); + bool res; + + res = (compareJsonbSuperHeaderValue(VARDATA(jba), VARDATA(jbb)) > 0); + + PG_FREE_IF_COPY(jba, 0); + PG_FREE_IF_COPY(jbb, 1); + PG_RETURN_BOOL(res); +} + +Datum +jsonb_le(PG_FUNCTION_ARGS) +{ + Jsonb *jba = PG_GETARG_JSONB(0); + Jsonb *jbb = PG_GETARG_JSONB(1); + bool res; + + res = (compareJsonbSuperHeaderValue(VARDATA(jba), VARDATA(jbb)) <= 0); + + PG_FREE_IF_COPY(jba, 0); + PG_FREE_IF_COPY(jbb, 1); + PG_RETURN_BOOL(res); +} + +Datum +jsonb_ge(PG_FUNCTION_ARGS) +{ + + Jsonb *jba = PG_GETARG_JSONB(0); + Jsonb *jbb = PG_GETARG_JSONB(1); + bool res; + + res = (compareJsonbSuperHeaderValue(VARDATA(jba), VARDATA(jbb)) >= 0); + + PG_FREE_IF_COPY(jba, 0); + PG_FREE_IF_COPY(jbb, 1); + PG_RETURN_BOOL(res); +} + +Datum +jsonb_eq(PG_FUNCTION_ARGS) +{ + Jsonb *jba = PG_GETARG_JSONB(0); + Jsonb *jbb = PG_GETARG_JSONB(1); + bool res; + + res = (compareJsonbSuperHeaderValue(VARDATA(jba), VARDATA(jbb)) == 0); + + PG_FREE_IF_COPY(jba, 0); + PG_FREE_IF_COPY(jbb, 1); + PG_RETURN_BOOL(res); +} + +Datum +jsonb_cmp(PG_FUNCTION_ARGS) +{ + Jsonb *jba = PG_GETARG_JSONB(0); + Jsonb *jbb = PG_GETARG_JSONB(1); + int res; + + res = compareJsonbSuperHeaderValue(VARDATA(jba), VARDATA(jbb)); + + PG_FREE_IF_COPY(jba, 0); + PG_FREE_IF_COPY(jbb, 1); + PG_RETURN_INT32(res); +} + +/* + * Hash operator class jsonb hashing function + */ +Datum +jsonb_hash(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + JsonbIterator *it; + int32 r; + JsonbValue v; + uint32 hash = 0; + + if (JB_ROOT_COUNT(jb) == 0) + PG_RETURN_INT32(0); + + it = JsonbIteratorInit(VARDATA(jb)); + + while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + switch (r) + { + /* Rotation is left to JsonbHashScalarValue() */ + case WJB_BEGIN_ARRAY: + hash ^= JB_FARRAY; + break; + case WJB_BEGIN_OBJECT: + hash ^= JB_FOBJECT; + break; + case WJB_KEY: + case WJB_VALUE: + case WJB_ELEM: + JsonbHashScalarValue(&v, &hash); + break; + case WJB_END_ARRAY: + case WJB_END_OBJECT: + break; + default: + elog(ERROR, "invalid JsonbIteratorNext rc: %d", r); + } + } + + PG_FREE_IF_COPY(jb, 0); + PG_RETURN_INT32(hash); +} diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c new file mode 100644 index 00000000000..4a1d4451301 --- /dev/null +++ b/src/backend/utils/adt/jsonb_util.c @@ -0,0 +1,1872 @@ +/*------------------------------------------------------------------------- + * + * jsonb_util.c + * Utilities for jsonb datatype + * + * Copyright (c) 2014, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/jsonb_util.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/hash.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/jsonb.h" +#include "utils/memutils.h" + +/* + * Twice as many values may be stored within pairs (for an Object) than within + * elements (for an Array), modulo the current MaxAllocSize limitation. Note + * that JSONB_MAX_PAIRS is derived from the number of possible pairs, not + * values (as is the case for arrays and their elements), because we're + * concerned about limitations on the representation of the number of pairs. + * Over twice the memory is required to store n JsonbPairs as n JsonbValues. + * It only takes exactly twice as much disk space for storage, though. The + * JsonbPair (not an actual pair of values) representation is used here because + * that is what is subject to the MaxAllocSize restriction when building an + * object. + */ +#define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JENTRY_POSMASK)) +#define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), \ + JENTRY_POSMASK)) + +/* + * State used while converting an arbitrary JsonbValue into a Jsonb value + * (4-byte varlena uncompressed representation of a Jsonb) + * + * ConvertLevel: Bookkeeping around particular level when converting. + */ +typedef struct convertLevel +{ + uint32 i; /* Iterates once per element, or once per pair */ + uint32 *header; /* Pointer to current container header */ + JEntry *meta; /* This level's metadata */ + char *begin; /* Pointer into convertState.buffer */ +} convertLevel; + +/* + * convertState: Overall bookkeeping state for conversion + */ +typedef struct convertState +{ + /* Preallocated buffer in which to form varlena/Jsonb value */ + Jsonb *buffer; + /* Pointer into buffer */ + char *ptr; + + /* State for */ + convertLevel *allState, /* Overall state array */ + *contPtr; /* Cur container pointer (in allState) */ + + /* Current size of buffer containing allState array */ + Size levelSz; + +} convertState; + +static int compareJsonbScalarValue(JsonbValue * a, JsonbValue * b); +static int lexicalCompareJsonbStringValue(const void *a, const void *b); +static Size convertJsonb(JsonbValue * val, Jsonb* buffer); +static inline short addPaddingInt(convertState * cstate); +static void walkJsonbValueConversion(JsonbValue * val, convertState * cstate, + uint32 nestlevel); +static void putJsonbValueConversion(convertState * cstate, JsonbValue * val, + uint32 flags, uint32 level); +static void putScalarConversion(convertState * cstate, JsonbValue * scalarVal, + uint32 level, uint32 i); +static void iteratorFromContainerBuf(JsonbIterator * it, char *buffer); +static bool formIterIsContainer(JsonbIterator ** it, JsonbValue * val, + JEntry * ent, bool skipNested); +static JsonbIterator *freeAndGetParent(JsonbIterator * it); +static JsonbParseState *pushState(JsonbParseState ** pstate); +static void appendKey(JsonbParseState * pstate, JsonbValue * scalarVal); +static void appendValue(JsonbParseState * pstate, JsonbValue * scalarVal); +static void appendElement(JsonbParseState * pstate, JsonbValue * scalarVal); +static int lengthCompareJsonbStringValue(const void *a, const void *b, void *arg); +static int lengthCompareJsonbPair(const void *a, const void *b, void *arg); +static void uniqueifyJsonbObject(JsonbValue * object); +static void uniqueifyJsonbArray(JsonbValue * array); + +/* + * Turn an in-memory JsonbValue into a Jsonb for on-disk storage. + * + * There isn't a JsonbToJsonbValue(), because generally we find it more + * convenient to directly iterate through the Jsonb representation and only + * really convert nested scalar values. formIterIsContainer() does this, so + * that clients of the iteration code don't have to directly deal with the + * binary representation (JsonbDeepContains() is a notable exception, although + * all exceptions are internal to this module). In general, functions that + * accept a JsonbValue argument are concerned with the manipulation of scalar + * values, or simple containers of scalar values, where it would be + * inconvenient to deal with a great amount of other state. + */ +Jsonb * +JsonbValueToJsonb(JsonbValue * val) +{ + Jsonb *out; + Size sz; + + if (IsAJsonbScalar(val)) + { + /* Scalar value */ + JsonbParseState *pstate = NULL; + JsonbValue *res; + JsonbValue scalarArray; + + scalarArray.type = jbvArray; + scalarArray.array.rawScalar = true; + scalarArray.array.nElems = 1; + + pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray); + pushJsonbValue(&pstate, WJB_ELEM, val); + res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL); + + out = palloc(VARHDRSZ + res->estSize); + sz = convertJsonb(res, out); + Assert(sz <= res->estSize); + SET_VARSIZE(out, sz + VARHDRSZ); + } + else if (val->type == jbvObject || val->type == jbvArray) + { + out = palloc(VARHDRSZ + val->estSize); + sz = convertJsonb(val, out); + Assert(sz <= val->estSize); + SET_VARSIZE(out, VARHDRSZ + sz); + } + else + { + Assert(val->type == jbvBinary); + out = palloc(VARHDRSZ + val->binary.len); + SET_VARSIZE(out, VARHDRSZ + val->binary.len); + memcpy(VARDATA(out), val->binary.data, val->binary.len); + } + + return out; +} + +/* + * BT comparator worker function. Returns an integer less than, equal to, or + * greater than zero, indicating whether a is less than, equal to, or greater + * than b. Consistent with the requirements for a B-Tree operator class + * + * Strings are compared lexically, in contrast with other places where we use a + * much simpler comparator logic for searching through Strings. Since this is + * called from B-Tree support function 1, we're careful about not leaking + * memory here. + */ +int +compareJsonbSuperHeaderValue(JsonbSuperHeader a, JsonbSuperHeader b) +{ + JsonbIterator *ita, + *itb; + int res = 0; + + ita = JsonbIteratorInit(a); + itb = JsonbIteratorInit(b); + + do + { + JsonbValue va, + vb; + int ra, + rb; + + ra = JsonbIteratorNext(&ita, &va, false); + rb = JsonbIteratorNext(&itb, &vb, false); + + /* + * To a limited extent we'll redundantly iterate over an array/object + * while re-performing the same test without any reasonable expectation + * of the same container types having differing lengths (as when we + * process a WJB_BEGIN_OBJECT, and later the corresponding + * WJB_END_OBJECT), but no matter. + */ + if (ra == rb) + { + if (ra == WJB_DONE) + { + /* Decisively equal */ + break; + } + + if (va.type == vb.type) + { + switch (va.type) + { + case jbvString: + res = lexicalCompareJsonbStringValue(&va, &vb); + break; + case jbvNull: + case jbvNumeric: + case jbvBool: + res = compareJsonbScalarValue(&va, &vb); + break; + case jbvArray: + /* + * This could be a "raw scalar" pseudo array. That's a + * special case here though, since we still want the + * general type-based comparisons to apply, and as far + * as we're concerned a pseudo array is just a scalar. + */ + if (va.array.rawScalar != vb.array.rawScalar) + res = (va.array.rawScalar) ? -1 : 1; + if (va.array.nElems != vb.array.nElems) + res = (va.array.nElems > vb.array.nElems) ? 1 : -1; + break; + case jbvObject: + if (va.object.nPairs != vb.object.nPairs) + res = (va.object.nPairs > vb.object.nPairs) ? 1 : -1; + break; + case jbvBinary: + elog(ERROR, "unexpected jbvBinary value"); + } + } + else + { + /* Type-defined order */ + res = (va.type > vb.type) ? 1 : -1; + } + } + else + { + /* + * It's safe to assume that the types differed. + * + * If the two values were the same container type, then there'd + * have been a chance to observe the variation in the number of + * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They + * can't be scalar types either, because then they'd have to be + * contained in containers already ruled unequal due to differing + * numbers of pairs/elements, or already directly ruled unequal + * with a call to the underlying type's comparator. + */ + Assert(va.type != vb.type); + Assert(va.type == jbvArray || va.type == jbvObject); + Assert(vb.type == jbvArray || vb.type == jbvObject); + /* Type-defined order */ + res = (va.type > vb.type) ? 1 : -1; + } + } + while (res == 0); + + while (ita != NULL) + { + JsonbIterator *i = ita->parent; + pfree(ita); + ita = i; + } + while (itb != NULL) + { + JsonbIterator *i = itb->parent; + pfree(itb); + itb = i; + } + + return res; +} + +/* + * Find value in object (i.e. the "value" part of some key/value pair in an + * object), or find a matching element if we're looking through an array. Do + * so on the basis of equality of the object keys only, or alternatively + * element values only, with a caller-supplied value "key". The "flags" + * argument allows the caller to specify which container types are of interest. + * + * This exported utility function exists to facilitate various cases concerned + * with "containment". If asked to look through an object, the caller had + * better pass a Jsonb String, because their keys can only be strings. + * Otherwise, for an array, any type of JsonbValue will do. + * + * In order to proceed with the search, it is necessary for callers to have + * both specified an interest in exactly one particular container type with an + * appropriate flag, as well as having the pointed-to Jsonb superheader be of + * one of those same container types at the top level. (Actually, we just do + * whichever makes sense to save callers the trouble of figuring it out - at + * most one can make sense, because the super header either points to an array + * (possible a "raw scalar" pseudo array) or an object.) + * + * Note that we can return a jbvBinary JsonbValue if this is called on an + * object, but we never do so on an array. If the caller asks to look through + * a container type that is not of the type pointed to by the superheader, + * immediately fall through and return NULL. If we cannot find the value, + * return NULL. Otherwise, return palloc()'d copy of value. + * + * lowbound can be NULL, but if not it's used to establish a point at which to + * start searching. If the value searched for is found, then lowbound is then + * set to an offset into the array or object. Typically, this is used to + * exploit the ordering of objects to avoid redundant work, by also sorting a + * list of items to be checked using the internal sort criteria for objects + * (object pair keys), and then, when searching for the second or subsequent + * item, picking it up where we left off knowing that the second or subsequent + * item can not be at a point below the low bound set when the first was found. + * This is only useful for objects, not arrays (which have a user-defined + * order), so array superheader Jsonbs should just pass NULL. Moreover, it's + * only useful because we only match object pairs on the basis of their key, so + * presumably anyone exploiting this is only interested in matching Object keys + * with a String. lowbound is given in units of pairs, not underlying values. + */ +JsonbValue * +findJsonbValueFromSuperHeader(JsonbSuperHeader sheader, uint32 flags, + uint32 *lowbound, JsonbValue * key) +{ + uint32 superheader = *(uint32 *) sheader; + JEntry *array = (JEntry *) (sheader + sizeof(uint32)); + int count = (superheader & JB_CMASK); + JsonbValue *result = palloc(sizeof(JsonbValue)); + + Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0); + + if (flags & JB_FARRAY & superheader) + { + char *data = (char *) (array + (superheader & JB_CMASK)); + int i; + + for (i = 0; i < count; i++) + { + JEntry *e = array + i; + + if (JBE_ISNULL(*e) && key->type == jbvNull) + { + result->type = jbvNull; + result->estSize = sizeof(JEntry); + } + else if (JBE_ISSTRING(*e) && key->type == jbvString) + { + result->type = jbvString; + result->string.val = data + JBE_OFF(*e); + result->string.len = JBE_LEN(*e); + result->estSize = sizeof(JEntry) + result->string.len; + } + else if (JBE_ISNUMERIC(*e) && key->type == jbvNumeric) + { + result->type = jbvNumeric; + result->numeric = (Numeric) (data + INTALIGN(JBE_OFF(*e))); + result->estSize = 2 * sizeof(JEntry) + + VARSIZE_ANY(result->numeric); + } + else if (JBE_ISBOOL(*e) && key->type == jbvBool) + { + result->type = jbvBool; + result->boolean = JBE_ISBOOL_TRUE(*e) != 0; + result->estSize = sizeof(JEntry); + } + else + continue; + + if (compareJsonbScalarValue(key, result) == 0) + return result; + } + } + else if (flags & JB_FOBJECT & superheader) + { + /* Since this is an object, account for *Pairs* of Jentrys */ + char *data = (char *) (array + (superheader & JB_CMASK) * 2); + uint32 stopLow = lowbound ? *lowbound : 0, + stopMiddle; + + /* Object key past by caller must be a string */ + Assert(key->type == jbvString); + + /* Binary search on object/pair keys *only* */ + while (stopLow < count) + { + JEntry *entry; + int difference; + JsonbValue candidate; + + /* + * Note how we compensate for the fact that we're iterating through + * pairs (not entries) throughout. + */ + stopMiddle = stopLow + (count - stopLow) / 2; + + entry = array + stopMiddle * 2; + + candidate.type = jbvString; + candidate.string.val = data + JBE_OFF(*entry); + candidate.string.len = JBE_LEN(*entry); + candidate.estSize = sizeof(JEntry) + candidate.string.len; + + difference = lengthCompareJsonbStringValue(&candidate, key, NULL); + + if (difference == 0) + { + /* Found our value (from key/value pair) */ + JEntry *v = entry + 1; + + if (lowbound) + *lowbound = stopMiddle + 1; + + if (JBE_ISNULL(*v)) + { + result->type = jbvNull; + result->estSize = sizeof(JEntry); + } + else if (JBE_ISSTRING(*v)) + { + result->type = jbvString; + result->string.val = data + JBE_OFF(*v); + result->string.len = JBE_LEN(*v); + result->estSize = sizeof(JEntry) + result->string.len; + } + else if (JBE_ISNUMERIC(*v)) + { + result->type = jbvNumeric; + result->numeric = (Numeric) (data + INTALIGN(JBE_OFF(*v))); + result->estSize = 2 * sizeof(JEntry) + + VARSIZE_ANY(result->numeric); + } + else if (JBE_ISBOOL(*v)) + { + result->type = jbvBool; + result->boolean = JBE_ISBOOL_TRUE(*v) != 0; + result->estSize = sizeof(JEntry); + } + else + { + /* + * See header comments to understand why this never happens + * with arrays + */ + result->type = jbvBinary; + result->binary.data = data + INTALIGN(JBE_OFF(*v)); + result->binary.len = JBE_LEN(*v) - + (INTALIGN(JBE_OFF(*v)) - JBE_OFF(*v)); + result->estSize = 2 * sizeof(JEntry) + result->binary.len; + } + + return result; + } + else + { + if (difference < 0) + stopLow = stopMiddle + 1; + else + count = stopMiddle; + } + } + + if (lowbound) + *lowbound = stopLow; + } + + /* Not found */ + pfree(result); + return NULL; +} + +/* + * Get i-th value of Jsonb array from superheader. + * + * Returns palloc()'d copy of value. + */ +JsonbValue * +getIthJsonbValueFromSuperHeader(JsonbSuperHeader sheader, uint32 i) +{ + uint32 superheader = *(uint32 *) sheader; + JsonbValue *result; + JEntry *array, + *e; + char *data; + + result = palloc(sizeof(JsonbValue)); + + if (i >= (superheader & JB_CMASK)) + return NULL; + + array = (JEntry *) (sheader + sizeof(uint32)); + + if (superheader & JB_FARRAY) + { + e = array + i; + data = (char *) (array + (superheader & JB_CMASK)); + } + else + { + elog(ERROR, "not a jsonb array"); + } + + if (JBE_ISNULL(*e)) + { + result->type = jbvNull; + result->estSize = sizeof(JEntry); + } + else if (JBE_ISSTRING(*e)) + { + result->type = jbvString; + result->string.val = data + JBE_OFF(*e); + result->string.len = JBE_LEN(*e); + result->estSize = sizeof(JEntry) + result->string.len; + } + else if (JBE_ISNUMERIC(*e)) + { + result->type = jbvNumeric; + result->numeric = (Numeric) (data + INTALIGN(JBE_OFF(*e))); + result->estSize = 2 * sizeof(JEntry) + VARSIZE_ANY(result->numeric); + } + else if (JBE_ISBOOL(*e)) + { + result->type = jbvBool; + result->boolean = JBE_ISBOOL_TRUE(*e) != 0; + result->estSize = sizeof(JEntry); + } + else + { + result->type = jbvBinary; + result->binary.data = data + INTALIGN(JBE_OFF(*e)); + result->binary.len = JBE_LEN(*e) - (INTALIGN(JBE_OFF(*e)) - JBE_OFF(*e)); + result->estSize = result->binary.len + 2 * sizeof(JEntry); + } + + return result; +} + +/* + * Push JsonbValue into JsonbParseState. + * + * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory + * JsonbValue to a Jsonb. + * + * Initial state of *JsonbParseState is NULL, since it'll be allocated here + * originally (caller will get JsonbParseState back by reference). + * + * Only sequential tokens pertaining to non-container types should pass a + * JsonbValue. There is one exception -- WJB_BEGIN_ARRAY callers may pass a + * "raw scalar" pseudo array to append that. + */ +JsonbValue * +pushJsonbValue(JsonbParseState ** pstate, int seq, JsonbValue * scalarVal) +{ + JsonbValue *result = NULL; + + switch (seq) + { + case WJB_BEGIN_ARRAY: + Assert(!scalarVal || scalarVal->array.rawScalar); + *pstate = pushState(pstate); + result = &(*pstate)->contVal; + (*pstate)->contVal.type = jbvArray; + (*pstate)->contVal.estSize = 3 * sizeof(JEntry); + (*pstate)->contVal.array.nElems = 0; + (*pstate)->contVal.array.rawScalar = (scalarVal && + scalarVal->array.rawScalar); + if (scalarVal && scalarVal->array.nElems > 0) + { + /* Assume that this array is still really a scalar */ + Assert(scalarVal->type == jbvArray); + (*pstate)->size = scalarVal->array.nElems; + } + else + { + (*pstate)->size = 4; + } + (*pstate)->contVal.array.elems = palloc(sizeof(JsonbValue) * + (*pstate)->size); + break; + case WJB_BEGIN_OBJECT: + Assert(!scalarVal); + *pstate = pushState(pstate); + result = &(*pstate)->contVal; + (*pstate)->contVal.type = jbvObject; + (*pstate)->contVal.estSize = 3 * sizeof(JEntry); + (*pstate)->contVal.object.nPairs = 0; + (*pstate)->size = 4; + (*pstate)->contVal.object.pairs = palloc(sizeof(JsonbPair) * + (*pstate)->size); + break; + case WJB_KEY: + Assert(scalarVal->type == jbvString); + appendKey(*pstate, scalarVal); + break; + case WJB_VALUE: + Assert(IsAJsonbScalar(scalarVal) || + scalarVal->type == jbvBinary); + appendValue(*pstate, scalarVal); + break; + case WJB_ELEM: + Assert(IsAJsonbScalar(scalarVal) || + scalarVal->type == jbvBinary); + appendElement(*pstate, scalarVal); + break; + case WJB_END_OBJECT: + uniqueifyJsonbObject(&(*pstate)->contVal); + case WJB_END_ARRAY: + /* Steps here common to WJB_END_OBJECT case */ + Assert(!scalarVal); + result = &(*pstate)->contVal; + + /* + * Pop stack and push current array/object as value in parent + * array/object + */ + *pstate = (*pstate)->next; + if (*pstate) + { + switch ((*pstate)->contVal.type) + { + case jbvArray: + appendElement(*pstate, result); + break; + case jbvObject: + appendValue(*pstate, result); + break; + default: + elog(ERROR, "invalid jsonb container type"); + } + } + break; + default: + elog(ERROR, "unrecognized jsonb sequential processing token"); + } + + return result; +} + +/* + * Given a Jsonb superheader, expand to JsonbIterator to iterate over items + * fully expanded to in-memory representation for manipulation. + * + * See JsonbIteratorNext() for notes on memory management. + */ +JsonbIterator * +JsonbIteratorInit(JsonbSuperHeader sheader) +{ + JsonbIterator *it = palloc(sizeof(JsonbIterator)); + + iteratorFromContainerBuf(it, sheader); + it->parent = NULL; + + return it; +} + +/* + * Get next JsonbValue while iterating + * + * Caller should initially pass their own, original iterator. They may get + * back a child iterator palloc()'d here instead. The function can be relied + * on to free those child iterators, lest the memory allocated for highly + * nested objects become unreasonable, but only if callers don't end iteration + * early (by breaking upon having found something in a search, for example). + * + * Callers in such a scenario, that are particularly sensitive to leaking + * memory in a long-lived context may walk the ancestral tree from the final + * iterator we left them with to its oldest ancestor, pfree()ing as they go. + * They do not have to free any other memory previously allocated for iterators + * but not accessible as direct ancestors of the iterator they're last passed + * back. + * + * Returns "Jsonb sequential processing" token value. Iterator "state" + * reflects the current stage of the process in a less granular fashion, and is + * mostly used here to track things internally with respect to particular + * iterators. + * + * Clients of this function should not have to handle any jbvBinary values + * (since recursive calls will deal with this), provided skipNested is false. + * It is our job to expand the jbvBinary representation without bothering them + * with it. However, clients should not take it upon themselves to touch array + * or Object element/pair buffers, since their element/pair pointers are + * garbage. + */ +int +JsonbIteratorNext(JsonbIterator ** it, JsonbValue * val, bool skipNested) +{ + JsonbIterState state; + + /* Guard against stack overflow due to overly complex Jsonb */ + check_stack_depth(); + + /* Recursive caller may have original caller's iterator */ + if (*it == NULL) + return WJB_DONE; + + state = (*it)->state; + + if ((*it)->containerType == JB_FARRAY) + { + if (state == jbi_start) + { + /* Set v to array on first array call */ + val->type = jbvArray; + val->array.nElems = (*it)->nElems; + /* + * v->array.elems is not actually set, because we aren't doing a + * full conversion + */ + val->array.rawScalar = (*it)->isScalar; + (*it)->i = 0; + /* Set state for next call */ + (*it)->state = jbi_elem; + return WJB_BEGIN_ARRAY; + } + else if (state == jbi_elem) + { + if ((*it)->i >= (*it)->nElems) + { + /* + * All elements within array already processed. Report this to + * caller, and give it back original parent iterator (which + * independently tracks iteration progress at its level of + * nesting). + */ + *it = freeAndGetParent(*it); + return WJB_END_ARRAY; + } + else if (formIterIsContainer(it, val, &(*it)->meta[(*it)->i++], + skipNested)) + { + /* + * New child iterator acquired within formIterIsContainer. + * Recurse into container. Don't directly return jbvBinary + * value to top-level client. + */ + return JsonbIteratorNext(it, val, skipNested); + } + else + { + /* Scalar item in array */ + return WJB_ELEM; + } + } + } + else if ((*it)->containerType == JB_FOBJECT) + { + if (state == jbi_start) + { + /* Set v to object on first object call */ + val->type = jbvObject; + val->object.nPairs = (*it)->nElems; + /* + * v->object.pairs is not actually set, because we aren't doing a + * full conversion + */ + (*it)->i = 0; + /* Set state for next call */ + (*it)->state = jbi_key; + return WJB_BEGIN_OBJECT; + } + else if (state == jbi_key) + { + if ((*it)->i >= (*it)->nElems) + { + /* + * All pairs within object already processed. Report this to + * caller, and give it back original containing iterator (which + * independently tracks iteration progress at its level of + * nesting). + */ + *it = freeAndGetParent(*it); + return WJB_END_OBJECT; + } + else + { + /* + * Return binary item key (ensured by setting skipNested to + * false directly). No child iterator, no further recursion. + * When control reaches here, it's probably from a recursive + * call. + */ + if (formIterIsContainer(it, val, &(*it)->meta[(*it)->i * 2], false)) + elog(ERROR, "unexpected container as object key"); + + Assert(val->type == jbvString); + /* Set state for next call */ + (*it)->state = jbi_value; + return WJB_KEY; + } + } + else if (state == jbi_value) + { + /* Set state for next call */ + (*it)->state = jbi_key; + + /* + * Value may be a container, in which case we recurse with new, + * child iterator. If it is, don't bother !skipNested callers with + * dealing with the jbvBinary representation. + */ + if (formIterIsContainer(it, val, &(*it)->meta[((*it)->i++) * 2 + 1], + skipNested)) + return JsonbIteratorNext(it, val, skipNested); + else + return WJB_VALUE; + } + } + + elog(ERROR, "invalid iterator state"); +} + +/* + * Worker for "contains" operator's function + * + * Formally speaking, containment is top-down, unordered subtree isomorphism. + * + * Takes iterators that belong to some container type. These iterators + * "belong" to those values in the sense that they've just been initialized in + * respect of them by the caller (perhaps in a nested fashion). + * + * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level. + * We determine if mContained is contained within val. + */ +bool +JsonbDeepContains(JsonbIterator ** val, JsonbIterator ** mContained) +{ + uint32 rval, + rcont; + JsonbValue vval, + vcontained; + /* + * Guard against stack overflow due to overly complex Jsonb. + * + * Functions called here independently take this precaution, but that might + * not be sufficient since this is also a recursive function. + */ + check_stack_depth(); + + rval = JsonbIteratorNext(val, &vval, false); + rcont = JsonbIteratorNext(mContained, &vcontained, false); + + if (rval != rcont) + { + /* + * The differing return values can immediately be taken as indicating + * two differing container types at this nesting level, which is + * sufficient reason to give up entirely (but it should be the case + * that they're both some container type). + */ + Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY); + Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY); + return false; + } + else if (rcont == WJB_BEGIN_OBJECT) + { + JsonbValue *lhsVal; /* lhsVal is from pair in lhs object */ + + Assert(vcontained.type == jbvObject); + + /* Work through rhs "is it contained within?" object */ + for (;;) + { + rcont = JsonbIteratorNext(mContained, &vcontained, false); + + /* + * When we get through caller's rhs "is it contained within?" + * object without failing to find one of its values, it's + * contained. + */ + if (rcont == WJB_END_OBJECT) + return true; + + Assert(rcont == WJB_KEY); + + /* First, find value by key... */ + lhsVal = findJsonbValueFromSuperHeader((*val)->buffer, + JB_FOBJECT, + NULL, + &vcontained); + + if (!lhsVal) + return false; + + /* + * ...at this stage it is apparent that there is at least a key + * match for this rhs pair. + */ + rcont = JsonbIteratorNext(mContained, &vcontained, true); + + Assert(rcont == WJB_VALUE); + + /* + * Compare rhs pair's value with lhs pair's value just found using + * key + */ + if (lhsVal->type != vcontained.type) + { + return false; + } + else if (IsAJsonbScalar(lhsVal)) + { + if (compareJsonbScalarValue(lhsVal, &vcontained) != 0) + return false; + } + else + { + /* Nested container value (object or array) */ + JsonbIterator *nestval, *nestContained; + + Assert(lhsVal->type == jbvBinary); + Assert(vcontained.type == jbvBinary); + + nestval = JsonbIteratorInit(lhsVal->binary.data); + nestContained = JsonbIteratorInit(vcontained.binary.data); + + /* + * Match "value" side of rhs datum object's pair recursively. + * It's a nested structure. + * + * Note that nesting still has to "match up" at the right + * nesting sub-levels. However, there need only be zero or + * more matching pairs (or elements) at each nesting level + * (provided the *rhs* pairs/elements *all* match on each + * level), which enables searching nested structures for a + * single String or other primitive type sub-datum quite + * effectively (provided the user constructed the rhs nested + * structure such that we "know where to look"). + * + * In other words, the mapping of container nodes in the rhs + * "vcontained" Jsonb to internal nodes on the lhs is + * injective, and parent-child edges on the rhs must be mapped + * to parent-child edges on the lhs to satisfy the condition of + * containment (plus of course the mapped nodes must be equal). + */ + if (!JsonbDeepContains(&nestval, &nestContained)) + return false; + } + } + } + else if (rcont == WJB_BEGIN_ARRAY) + { + JsonbValue *lhsConts = NULL; + uint32 nLhsElems = vval.array.nElems; + + Assert(vcontained.type == jbvArray); + + /* + * Handle distinction between "raw scalar" pseudo arrays, and real + * arrays. + * + * A raw scalar may contain another raw scalar, and an array may + * contain a raw scalar, but a raw scalar may not contain an array. We + * don't do something like this for the object case, since objects can + * only contain pairs, never raw scalars (a pair is represented by an + * rhs object argument with a single contained pair). + */ + if (vval.array.rawScalar && !vcontained.array.rawScalar) + return false; + + /* Work through rhs "is it contained within?" array */ + for (;;) + { + rcont = JsonbIteratorNext(mContained, &vcontained, true); + + /* + * When we get through caller's rhs "is it contained within?" array + * without failing to find one of its values, it's contained. + */ + if (rcont == WJB_END_ARRAY) + return true; + + Assert(rcont == WJB_ELEM); + + if (IsAJsonbScalar(&vcontained)) + { + if (!findJsonbValueFromSuperHeader((*val)->buffer, + JB_FARRAY, + NULL, + &vcontained)) + return false; + } + else + { + uint32 i; + + /* + * If this is first container found in rhs array (at this + * depth), initialize temp lhs array of containers + */ + if (lhsConts == NULL) + { + uint32 j = 0; + + /* Make room for all possible values */ + lhsConts = palloc(sizeof(JsonbValue) * nLhsElems); + + for (i = 0; i < nLhsElems; i++) + { + /* Store all lhs elements in temp array*/ + rcont = JsonbIteratorNext(val, &vval, true); + Assert(rcont == WJB_ELEM); + + if (vval.type == jbvBinary) + lhsConts[j++] = vval; + } + + /* No container elements in temp array, so give up now */ + if (j == 0) + return false; + + /* We may have only partially filled array */ + nLhsElems = j; + } + + /* XXX: Nested array containment is O(N^2) */ + for (i = 0; i < nLhsElems; i++) + { + /* Nested container value (object or array) */ + JsonbIterator *nestval, *nestContained; + bool contains; + + nestval = JsonbIteratorInit(lhsConts[i].binary.data); + nestContained = JsonbIteratorInit(vcontained.binary.data); + + contains = JsonbDeepContains(&nestval, &nestContained); + + if (nestval) + pfree(nestval); + if (nestContained) + pfree(nestContained); + if (contains) + break; + } + + /* + * Report rhs container value is not contained if couldn't + * match rhs container to *some* lhs cont + */ + if (i == nLhsElems) + return false; + } + } + } + else + { + elog(ERROR, "invalid jsonb container type"); + } + + elog(ERROR, "unexpectedly fell off end of jsonb container"); +} + +/* + * Convert a Postgres text array to a Jsonb array, sorted and with + * de-duplicated key elements. This is used for searching an object for items + * in the array, so we enforce that the number of strings cannot exceed + * JSONB_MAX_PAIRS. + */ +JsonbValue * +arrayToJsonbSortedArray(ArrayType *array) +{ + Datum *key_datums; + bool *key_nulls; + int elem_count; + JsonbValue *result; + int i, + j; + + /* Extract data for sorting */ + deconstruct_array(array, TEXTOID, -1, false, 'i', &key_datums, &key_nulls, + &elem_count); + + if (elem_count == 0) + return NULL; + + /* + * A text array uses at least eight bytes per element, so any overflow in + * "key_count * sizeof(JsonbPair)" is small enough for palloc() to catch. + * However, credible improvements to the array format could invalidate that + * assumption. Therefore, use an explicit check rather than relying on + * palloc() to complain. + */ + if (elem_count > JSONB_MAX_PAIRS) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of array elements (%d) exceeds maximum allowed Jsonb pairs (%zu)", + elem_count, JSONB_MAX_PAIRS))); + + result = palloc(sizeof(JsonbValue)); + result->type = jbvArray; + result->array.rawScalar = false; + result->array.elems = palloc(sizeof(JsonbPair) * elem_count); + + for (i = 0, j = 0; i < elem_count; i++) + { + if (!key_nulls[i]) + { + result->array.elems[j].type = jbvString; + result->array.elems[j].string.val = VARDATA(key_datums[i]); + result->array.elems[j].string.len = VARSIZE(key_datums[i]) - VARHDRSZ; + j++; + } + } + result->array.nElems = j; + + uniqueifyJsonbArray(result); + return result; +} + +/* + * Hash a JsonbValue scalar value, mixing in the hash value with an existing + * hash provided by the caller. + * + * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY + * flags. + */ +void +JsonbHashScalarValue(const JsonbValue * scalarVal, uint32 * hash) +{ + int tmp; + + /* + * Combine hash values of successive keys, values and elements by rotating + * the previous value left 1 bit, then XOR'ing in the new + * key/value/element's hash value. + */ + *hash = (*hash << 1) | (*hash >> 31); + switch (scalarVal->type) + { + case jbvNull: + *hash ^= 0x01; + return; + case jbvString: + tmp = hash_any((unsigned char *) scalarVal->string.val, + scalarVal->string.len); + *hash ^= tmp; + return; + case jbvNumeric: + /* Must be unaffected by trailing zeroes */ + tmp = DatumGetInt32(DirectFunctionCall1(hash_numeric, + NumericGetDatum(scalarVal->numeric))); + *hash ^= tmp; + return; + case jbvBool: + *hash ^= scalarVal->boolean? 0x02:0x04; + return; + default: + elog(ERROR, "invalid jsonb scalar type"); + } +} + +/* + * Are two scalar JsonbValues of the same type a and b equal? + * + * Does not use lexical comparisons. Therefore, it is essentially that this + * never be used against Strings for anything other than searching for values + * within a single jsonb. + */ +static int +compareJsonbScalarValue(JsonbValue * aScalar, JsonbValue * bScalar) +{ + if (aScalar->type == bScalar->type) + { + switch (aScalar->type) + { + case jbvNull: + return 0; + case jbvString: + return lengthCompareJsonbStringValue(aScalar, bScalar, NULL); + case jbvNumeric: + return DatumGetInt32(DirectFunctionCall2(numeric_cmp, + PointerGetDatum(aScalar->numeric), + PointerGetDatum(bScalar->numeric))); + case jbvBool: + if (aScalar->boolean != bScalar->boolean) + return (aScalar->boolean > bScalar->boolean) ? 1 : -1; + else + return 0; + default: + elog(ERROR, "invalid jsonb scalar type"); + } + } + elog(ERROR, "jsonb scalar type mismatch"); +} + +/* + * Standard lexical qsort() comparator of jsonb strings. + * + * Sorts strings lexically, using the default database collation. Used by + * B-Tree operators, where a lexical sort order is generally expected. + */ +static int +lexicalCompareJsonbStringValue(const void *a, const void *b) +{ + const JsonbValue *va = (const JsonbValue *) a; + const JsonbValue *vb = (const JsonbValue *) b; + + Assert(va->type == jbvString); + Assert(vb->type == jbvString); + + return varstr_cmp(va->string.val, va->string.len, vb->string.val, + vb->string.len, DEFAULT_COLLATION_OID); +} + +/* + * Given a JsonbValue, convert to Jsonb and store in preallocated Jsonb buffer + * sufficiently large to fit the value + */ +static Size +convertJsonb(JsonbValue * val, Jsonb *buffer) +{ + convertState state; + Size len; + + /* Should not already have binary representation */ + Assert(val->type != jbvBinary); + + state.buffer = buffer; + /* Start from superheader */ + state.ptr = VARDATA(state.buffer); + state.levelSz = 8; + state.allState = palloc(sizeof(convertLevel) * state.levelSz); + + walkJsonbValueConversion(val, &state, 0); + + len = state.ptr - VARDATA(state.buffer); + + Assert(len <= val->estSize); + return len; +} + +/* + * Walk the tree representation of Jsonb, as part of the process of converting + * a JsonbValue to a Jsonb. + * + * This high-level function takes care of recursion into sub-containers, but at + * the top level calls putJsonbValueConversion once per sequential processing + * token (in a manner similar to generic iteration). + */ +static void +walkJsonbValueConversion(JsonbValue * val, convertState * cstate, + uint32 nestlevel) +{ + int i; + + check_stack_depth(); + + if (!val) + return; + + switch (val->type) + { + case jbvArray: + + putJsonbValueConversion(cstate, val, WJB_BEGIN_ARRAY, nestlevel); + for (i = 0; i < val->array.nElems; i++) + { + if (IsAJsonbScalar(&val->array.elems[i]) || + val->array.elems[i].type == jbvBinary) + putJsonbValueConversion(cstate, val->array.elems + i, + WJB_ELEM, nestlevel); + else + walkJsonbValueConversion(val->array.elems + i, cstate, + nestlevel + 1); + } + putJsonbValueConversion(cstate, val, WJB_END_ARRAY, nestlevel); + + break; + case jbvObject: + + putJsonbValueConversion(cstate, val, WJB_BEGIN_OBJECT, nestlevel); + for (i = 0; i < val->object.nPairs; i++) + { + putJsonbValueConversion(cstate, &val->object.pairs[i].key, + WJB_KEY, nestlevel); + + if (IsAJsonbScalar(&val->object.pairs[i].value) || + val->object.pairs[i].value.type == jbvBinary) + putJsonbValueConversion(cstate, + &val->object.pairs[i].value, + WJB_VALUE, nestlevel); + else + walkJsonbValueConversion(&val->object.pairs[i].value, + cstate, nestlevel + 1); + } + putJsonbValueConversion(cstate, val, WJB_END_OBJECT, nestlevel); + + break; + default: + elog(ERROR, "unknown type of jsonb container"); + } +} + +/* + * walkJsonbValueConversion() worker. Add padding sufficient to int-align our + * access to conversion buffer. + */ +static inline +short addPaddingInt(convertState * cstate) +{ + short padlen, p; + + padlen = INTALIGN(cstate->ptr - VARDATA(cstate->buffer)) - + (cstate->ptr - VARDATA(cstate->buffer)); + + for (p = padlen; p > 0; p--) + { + *cstate->ptr = '\0'; + cstate->ptr++; + } + + return padlen; +} + +/* + * walkJsonbValueConversion() worker. + * + * As part of the process of converting an arbitrary JsonbValue to a Jsonb, + * copy over an arbitrary individual JsonbValue. This function may copy any + * type of value, even containers (Objects/arrays). However, it is not + * responsible for recursive aspects of walking the tree (so only top-level + * Object/array details are handled). + * + * No details about their keys/values/elements are handled recursively - + * rather, the function is called as required for the start of an Object/Array, + * and the end (i.e. there is one call per sequential processing WJB_* token). + */ +static void +putJsonbValueConversion(convertState * cstate, JsonbValue * val, uint32 flags, + uint32 level) +{ + if (level == cstate->levelSz) + { + cstate->levelSz *= 2; + cstate->allState = repalloc(cstate->allState, + sizeof(convertLevel) * cstate->levelSz); + } + + cstate->contPtr = cstate->allState + level; + + if (flags & (WJB_BEGIN_ARRAY | WJB_BEGIN_OBJECT)) + { + Assert(((flags & WJB_BEGIN_ARRAY) && val->type == jbvArray) || + ((flags & WJB_BEGIN_OBJECT) && val->type == jbvObject)); + + /* Initialize pointer into conversion buffer at this level */ + cstate->contPtr->begin = cstate->ptr; + + addPaddingInt(cstate); + + /* Initialize everything else at this level */ + cstate->contPtr->header = (uint32 *) cstate->ptr; + /* Advance past header */ + cstate->ptr += sizeof(uint32); + cstate->contPtr->meta = (JEntry *) cstate->ptr; + cstate->contPtr->i = 0; + + if (val->type == jbvArray) + { + *cstate->contPtr->header = val->array.nElems | JB_FARRAY; + cstate->ptr += sizeof(JEntry) * val->array.nElems; + + if (val->array.rawScalar) + { + Assert(val->array.nElems == 1); + Assert(level == 0); + *cstate->contPtr->header |= JB_FSCALAR; + } + } + else + { + *cstate->contPtr->header = val->object.nPairs | JB_FOBJECT; + cstate->ptr += sizeof(JEntry) * val->object.nPairs * 2; + } + } + else if (flags & WJB_ELEM) + { + putScalarConversion(cstate, val, level, cstate->contPtr->i); + cstate->contPtr->i++; + } + else if (flags & WJB_KEY) + { + Assert(val->type == jbvString); + + putScalarConversion(cstate, val, level, cstate->contPtr->i * 2); + } + else if (flags & WJB_VALUE) + { + putScalarConversion(cstate, val, level, cstate->contPtr->i * 2 + 1); + cstate->contPtr->i++; + } + else if (flags & (WJB_END_ARRAY | WJB_END_OBJECT)) + { + convertLevel *prevPtr; /* Prev container pointer */ + uint32 len, + i; + + Assert(((flags & WJB_END_ARRAY) && val->type == jbvArray) || + ((flags & WJB_END_OBJECT) && val->type == jbvObject)); + + if (level == 0) + return; + + len = cstate->ptr - (char *) cstate->contPtr->begin; + + prevPtr = cstate->contPtr - 1; + + if (*prevPtr->header & JB_FARRAY) + { + i = prevPtr->i; + + prevPtr->meta[i].header = JENTRY_ISNEST; + + if (i == 0) + prevPtr->meta[0].header |= JENTRY_ISFIRST | len; + else + prevPtr->meta[i].header |= + (prevPtr->meta[i - 1].header & JENTRY_POSMASK) + len; + } + else if (*prevPtr->header & JB_FOBJECT) + { + i = 2 * prevPtr->i + 1; /* Value, not key */ + + prevPtr->meta[i].header = JENTRY_ISNEST; + + prevPtr->meta[i].header |= + (prevPtr->meta[i - 1].header & JENTRY_POSMASK) + len; + } + else + { + elog(ERROR, "invalid jsonb container type"); + } + + Assert(cstate->ptr - cstate->contPtr->begin <= val->estSize); + prevPtr->i++; + } + else + { + elog(ERROR, "unknown flag encountered during jsonb tree walk"); + } +} + +/* + * As part of the process of converting an arbitrary JsonbValue to a Jsonb, + * serialize and copy a scalar value into buffer. + * + * This is a worker function for putJsonbValueConversion() (itself a worker for + * walkJsonbValueConversion()). It handles the details with regard to Jentry + * metadata peculiar to each scalar type. + */ +static void +putScalarConversion(convertState * cstate, JsonbValue * scalarVal, uint32 level, + uint32 i) +{ + int numlen; + short padlen; + + cstate->contPtr = cstate->allState + level; + + if (i == 0) + cstate->contPtr->meta[0].header = JENTRY_ISFIRST; + else + cstate->contPtr->meta[i].header = 0; + + switch (scalarVal->type) + { + case jbvNull: + cstate->contPtr->meta[i].header |= JENTRY_ISNULL; + + if (i > 0) + cstate->contPtr->meta[i].header |= + cstate->contPtr->meta[i - 1].header & JENTRY_POSMASK; + break; + case jbvString: + memcpy(cstate->ptr, scalarVal->string.val, scalarVal->string.len); + cstate->ptr += scalarVal->string.len; + + if (i == 0) + cstate->contPtr->meta[0].header |= scalarVal->string.len; + else + cstate->contPtr->meta[i].header |= + (cstate->contPtr->meta[i - 1].header & JENTRY_POSMASK) + + scalarVal->string.len; + break; + case jbvNumeric: + numlen = VARSIZE_ANY(scalarVal->numeric); + padlen = addPaddingInt(cstate); + + memcpy(cstate->ptr, scalarVal->numeric, numlen); + cstate->ptr += numlen; + + cstate->contPtr->meta[i].header |= JENTRY_ISNUMERIC; + if (i == 0) + cstate->contPtr->meta[0].header |= padlen + numlen; + else + cstate->contPtr->meta[i].header |= + (cstate->contPtr->meta[i - 1].header & JENTRY_POSMASK) + + padlen + numlen; + break; + case jbvBool: + cstate->contPtr->meta[i].header |= (scalarVal->boolean) ? + JENTRY_ISTRUE : JENTRY_ISFALSE; + + if (i > 0) + cstate->contPtr->meta[i].header |= + cstate->contPtr->meta[i - 1].header & JENTRY_POSMASK; + break; + default: + elog(ERROR, "invalid jsonb scalar type"); + } +} + +/* + * Given superheader pointer into buffer, initialize iterator. Must be a + * container type. + */ +static void +iteratorFromContainerBuf(JsonbIterator * it, JsonbSuperHeader sheader) +{ + uint32 superheader = *(uint32 *) sheader; + + it->containerType = superheader & (JB_FARRAY | JB_FOBJECT); + it->nElems = superheader & JB_CMASK; + it->buffer = sheader; + + /* Array starts just after header */ + it->meta = (JEntry *) (sheader + sizeof(uint32)); + it->state = jbi_start; + + switch (it->containerType) + { + case JB_FARRAY: + it->dataProper = + (char *) it->meta + it->nElems * sizeof(JEntry); + it->isScalar = (superheader & JB_FSCALAR) != 0; + /* This is either a "raw scalar", or an array */ + Assert(!it->isScalar || it->nElems == 1); + break; + case JB_FOBJECT: + /* + * Offset reflects that nElems indicates JsonbPairs in an object. + * Each key and each value contain Jentry metadata just the same. + */ + it->dataProper = + (char *) it->meta + it->nElems * sizeof(JEntry) * 2; + break; + default: + elog(ERROR, "unknown type of jsonb container"); + } +} + +/* + * JsonbIteratorNext() worker + * + * Returns bool indicating if v was a non-jbvBinary container, and thus if + * further recursion is required by caller (according to its skipNested + * preference). If it is required, we set the caller's iterator for further + * recursion into the nested value. If we're going to skip nested items, just + * set v to a jbvBinary value, but don't set caller's iterator. + * + * Unlike with containers (either in this function or in any + * JsonbIteratorNext() infrastructure), we fully convert from what is + * ultimately a Jsonb on-disk representation, to a JsonbValue in-memory + * representation (for scalar values only). JsonbIteratorNext() initializes + * container Jsonbvalues, but without a sane private buffer. For scalar values + * it has to be done for real (even if we don't actually allocate more memory + * to do this. The point is that our JsonbValues scalars can be passed around + * anywhere). + */ +static bool +formIterIsContainer(JsonbIterator ** it, JsonbValue * val, JEntry * ent, + bool skipNested) +{ + if (JBE_ISNULL(*ent)) + { + val->type = jbvNull; + val->estSize = sizeof(JEntry); + + return false; + } + else if (JBE_ISSTRING(*ent)) + { + val->type = jbvString; + val->string.val = (*it)->dataProper + JBE_OFF(*ent); + val->string.len = JBE_LEN(*ent); + val->estSize = sizeof(JEntry) + val->string.len; + + return false; + } + else if (JBE_ISNUMERIC(*ent)) + { + val->type = jbvNumeric; + val->numeric = (Numeric) ((*it)->dataProper + INTALIGN(JBE_OFF(*ent))); + val->estSize = 2 * sizeof(JEntry) + VARSIZE_ANY(val->numeric); + + return false; + } + else if (JBE_ISBOOL(*ent)) + { + val->type = jbvBool; + val->boolean = JBE_ISBOOL_TRUE(*ent) != 0; + val->estSize = sizeof(JEntry); + + return false; + } + else if (skipNested) + { + val->type = jbvBinary; + val->binary.data = (*it)->dataProper + INTALIGN(JBE_OFF(*ent)); + val->binary.len = JBE_LEN(*ent) - (INTALIGN(JBE_OFF(*ent)) - JBE_OFF(*ent)); + val->estSize = val->binary.len + 2 * sizeof(JEntry); + + return false; + } + else + { + /* + * Must be container type, so setup caller's iterator to point to that, + * and return indication of that. + * + * Get child iterator. + */ + JsonbIterator *child = palloc(sizeof(JsonbIterator)); + + iteratorFromContainerBuf(child, + (*it)->dataProper + INTALIGN(JBE_OFF(*ent))); + + child->parent = *it; + *it = child; + + return true; + } +} + +/* + * JsonbIteratorNext() worker: Return parent, while freeing memory for current + * iterator + */ +static JsonbIterator * +freeAndGetParent(JsonbIterator * it) +{ + JsonbIterator *v = it->parent; + + pfree(it); + return v; +} + +/* + * pushJsonbValue() worker: Iteration-like forming of Jsonb + */ +static JsonbParseState * +pushState(JsonbParseState ** pstate) +{ + JsonbParseState *ns = palloc(sizeof(JsonbParseState)); + + ns->next = *pstate; + return ns; +} + +/* + * pushJsonbValue() worker: Append a pair key to state when generating a Jsonb + */ +static void +appendKey(JsonbParseState * pstate, JsonbValue * string) +{ + JsonbValue *object = &pstate->contVal; + + Assert(object->type == jbvObject); + Assert(string->type == jbvString); + + if (object->object.nPairs >= JSONB_MAX_PAIRS) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)", + JSONB_MAX_PAIRS))); + + if (object->object.nPairs >= pstate->size) + { + pstate->size *= 2; + object->object.pairs = repalloc(object->object.pairs, + sizeof(JsonbPair) * pstate->size); + } + + object->object.pairs[object->object.nPairs].key = *string; + object->object.pairs[object->object.nPairs].order = object->object.nPairs; + + object->estSize += string->estSize; +} + +/* + * pushJsonbValue() worker: Append a pair value to state when generating a + * Jsonb + */ +static void +appendValue(JsonbParseState * pstate, JsonbValue * scalarVal) +{ + JsonbValue *object = &pstate->contVal; + + Assert(object->type == jbvObject); + + object->object.pairs[object->object.nPairs++].value = *scalarVal; + object->estSize += scalarVal->estSize; +} + +/* + * pushJsonbValue() worker: Append an element to state when generating a Jsonb + */ +static void +appendElement(JsonbParseState * pstate, JsonbValue * scalarVal) +{ + JsonbValue *array = &pstate->contVal; + + Assert(array->type == jbvArray); + + if (array->array.nElems >= JSONB_MAX_ELEMS) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)", + JSONB_MAX_ELEMS))); + + if (array->array.nElems >= pstate->size) + { + pstate->size *= 2; + array->array.elems = repalloc(array->array.elems, + sizeof(JsonbValue) * pstate->size); + } + + array->array.elems[array->array.nElems++] = *scalarVal; + array->estSize += scalarVal->estSize; +} + +/* + * Compare two jbvString JsonbValue values, a and b. + * + * This is a special qsort_arg() comparator used to sort strings in certain + * internal contexts where it is sufficient to have a well-defined sort order. + * In particular, object pair keys are sorted according to this criteria to + * facilitate cheap binary searches where we don't care about lexical sort + * order. + * + * a and b are first sorted based on their length. If a tie-breaker is + * required, only then do we consider string binary equality. + * + * Third argument 'binequal' may point to a bool. If it's set, *binequal is set + * to true iff a and b have full binary equality, since some callers have an + * interest in whether the two values are equal or merely equivalent. + */ +static int +lengthCompareJsonbStringValue(const void *a, const void *b, void *binequal) +{ + const JsonbValue *va = (const JsonbValue *) a; + const JsonbValue *vb = (const JsonbValue *) b; + int res; + + Assert(va->type == jbvString); + Assert(vb->type == jbvString); + + if (va->string.len == vb->string.len) + { + res = memcmp(va->string.val, vb->string.val, va->string.len); + if (res == 0 && binequal) + *((bool *) binequal) = true; + } + else + { + res = (va->string.len > vb->string.len) ? 1 : -1; + } + + return res; +} + +/* + * qsort_arg() comparator to compare JsonbPair values. + * + * Function implemented in terms of lengthCompareJsonbStringValue(), and thus the + * same "arg setting" hack will be applied here in respect of the pair's key + * values. + * + * N.B: String comparisons here are "length-wise" + * + * Pairs with equals keys are ordered such that the order field is respected. + */ +static int +lengthCompareJsonbPair(const void *a, const void *b, void *binequal) +{ + const JsonbPair *pa = (const JsonbPair *) a; + const JsonbPair *pb = (const JsonbPair *) b; + int res; + + res = lengthCompareJsonbStringValue(&pa->key, &pb->key, binequal); + + /* + * Guarantee keeping order of equal pair. Unique algorithm will prefer + * first element as value. + */ + if (res == 0) + res = (pa->order > pb->order) ? -1 : 1; + + return res; +} + +/* + * Sort and unique-ify pairs in JsonbValue object + */ +static void +uniqueifyJsonbObject(JsonbValue * object) +{ + bool hasNonUniq = false; + + Assert(object->type == jbvObject); + + if (object->object.nPairs > 1) + qsort_arg(object->object.pairs, object->object.nPairs, sizeof(JsonbPair), + lengthCompareJsonbPair, &hasNonUniq); + + if (hasNonUniq) + { + JsonbPair *ptr = object->object.pairs + 1, + *res = object->object.pairs; + + while (ptr - object->object.pairs < object->object.nPairs) + { + /* Avoid copying over duplicate */ + if (lengthCompareJsonbStringValue(ptr, res, NULL) == 0) + { + object->estSize -= ptr->key.estSize + ptr->value.estSize; + } + else + { + res++; + if (ptr != res) + memcpy(res, ptr, sizeof(JsonbPair)); + } + ptr++; + } + + object->object.nPairs = res + 1 - object->object.pairs; + } +} + +/* + * Sort and unique-ify JsonbArray. + * + * Sorting uses internal ordering. + */ +static void +uniqueifyJsonbArray(JsonbValue * array) +{ + bool hasNonUniq = false; + + Assert(array->type == jbvArray); + + /* + * Actually sort values, determining if any were equal on the basis of full + * binary equality (rather than just having the same string length). + */ + if (array->array.nElems > 1) + qsort_arg(array->array.elems, array->array.nElems, + sizeof(JsonbValue), lengthCompareJsonbStringValue, + &hasNonUniq); + + if (hasNonUniq) + { + JsonbValue *ptr = array->array.elems + 1, + *res = array->array.elems; + + while (ptr - array->array.elems < array->array.nElems) + { + /* Avoid copying over duplicate */ + if (lengthCompareJsonbStringValue(ptr, res, NULL) != 0) + { + res++; + *res = *ptr; + } + + ptr++; + } + + array->array.nElems = res + 1 - array->array.elems; + } +} diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index 23203056085..f80eaeb1c6d 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * jsonfuncs.c - * Functions to process JSON data type. + * Functions to process JSON data types. * * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -27,6 +27,7 @@ #include "utils/builtins.h" #include "utils/hsearch.h" #include "utils/json.h" +#include "utils/jsonb.h" #include "utils/jsonapi.h" #include "utils/lsyscache.h" #include "utils/memutils.h" @@ -47,18 +48,20 @@ static void get_array_element_end(void *state, bool isnull); static void get_scalar(void *state, char *token, JsonTokenType tokentype); /* common worker function for json getter functions */ -static inline Datum get_path_all(PG_FUNCTION_ARGS, bool as_text); +static inline Datum get_path_all(FunctionCallInfo fcinfo, bool as_text); static inline text *get_worker(text *json, char *field, int elem_index, char **tpath, int *ipath, int npath, bool normalize_results); +static inline Datum get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text); /* semantic action functions for json_array_length */ static void alen_object_start(void *state); static void alen_scalar(void *state, char *token, JsonTokenType tokentype); static void alen_array_element_start(void *state, bool isnull); -/* common worker for json_each* functions */ -static inline Datum each_worker(PG_FUNCTION_ARGS, bool as_text); +/* common workers for json{b}_each* functions */ +static inline Datum each_worker(FunctionCallInfo fcinfo, bool as_text); +static inline Datum each_worker_jsonb(FunctionCallInfo fcinfo, bool as_text); /* semantic action functions for json_each */ static void each_object_field_start(void *state, char *fname, bool isnull); @@ -66,8 +69,9 @@ static void each_object_field_end(void *state, char *fname, bool isnull); static void each_array_start(void *state); static void each_scalar(void *state, char *token, JsonTokenType tokentype); -/* common worker for json_each* functions */ -static inline Datum elements_worker(PG_FUNCTION_ARGS, bool as_text); +/* common workers for json{b}_array_elements_* functions */ +static inline Datum elements_worker(FunctionCallInfo fcinfo, bool as_text); +static inline Datum elements_worker_jsonb(FunctionCallInfo fcinfo, bool as_text); /* semantic action functions for json_array_elements */ static void elements_object_start(void *state); @@ -79,7 +83,7 @@ static void elements_scalar(void *state, char *token, JsonTokenType tokentype); static HTAB *get_json_object_as_hash(text *json, char *funcname, bool use_json_as_text); /* common worker for populate_record and to_record */ -static inline Datum populate_record_worker(PG_FUNCTION_ARGS, +static inline Datum populate_record_worker(FunctionCallInfo fcinfo, bool have_record_arg); /* semantic action functions for get_json_object_as_hash */ @@ -98,8 +102,13 @@ static void populate_recordset_array_start(void *state); static void populate_recordset_array_element_start(void *state, bool isnull); /* worker function for populate_recordset and to_recordset */ -static inline Datum populate_recordset_worker(PG_FUNCTION_ARGS, +static inline Datum populate_recordset_worker(FunctionCallInfo fcinfo, bool have_record_arg); +/* Worker that takes care of common setup for us */ +static JsonbValue *findJsonbValueFromSuperHeaderLen(JsonbSuperHeader sheader, + uint32 flags, + char *key, + uint32 keylen); /* search type classification for json_get* functions */ typedef enum @@ -225,18 +234,98 @@ typedef struct PopulateRecordsetState MemoryContext fn_mcxt; /* used to stash IO funcs */ } PopulateRecordsetState; +/* Turn a jsonb object into a record */ +static void make_row_from_rec_and_jsonb(Jsonb * element, + PopulateRecordsetState *state); + /* - * SQL function json_object-keys + * SQL function json_object_keys * * Returns the set of keys for the object argument. * * This SRF operates in value-per-call mode. It processes the * object during the first call, and the keys are simply stashed - * in an array, whise size is expanded as necessary. This is probably + * in an array, whose size is expanded as necessary. This is probably * safe enough for a list of keys of a single object, since they are * limited in size to NAMEDATALEN and the number of keys is unlikely to * be so huge that it has major memory implications. */ +Datum +jsonb_object_keys(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + OkeysState *state; + int i; + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + Jsonb *jb = PG_GETARG_JSONB(0); + bool skipNested = false; + JsonbIterator *it; + JsonbValue v; + int r; + + if (JB_ROOT_IS_SCALAR(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call jsonb_object_keys on a scalar"))); + else if (JB_ROOT_IS_ARRAY(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call jsonb_object_keys on an array"))); + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + state = palloc(sizeof(OkeysState)); + + state->result_size = JB_ROOT_COUNT(jb); + state->result_count = 0; + state->sent_count = 0; + state->result = palloc(state->result_size * sizeof(char *)); + + it = JsonbIteratorInit(VARDATA_ANY(jb)); + + while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE) + { + skipNested = true; + + if (r == WJB_KEY) + { + char *cstr; + + cstr = palloc(v.string.len + 1 * sizeof(char)); + memcpy(cstr, v.string.val, v.string.len); + cstr[v.string.len] = '\0'; + state->result[state->result_count++] = cstr; + } + } + + + MemoryContextSwitchTo(oldcontext); + funcctx->user_fctx = (void *) state; + + } + + funcctx = SRF_PERCALL_SETUP(); + state = (OkeysState *) funcctx->user_fctx; + + if (state->sent_count < state->result_count) + { + char *nxt = state->result[state->sent_count++]; + + SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(nxt)); + } + + /* cleanup to reduce or eliminate memory leaks */ + for (i = 0; i < state->result_count; i++) + pfree(state->result[i]); + pfree(state->result); + pfree(state); + + SRF_RETURN_DONE(funcctx); +} Datum @@ -350,9 +439,9 @@ okeys_scalar(void *state, char *token, JsonTokenType tokentype) } /* - * json getter functions + * json and jsonb getter functions * these implement the -> ->> #> and #>> operators - * and the json_extract_path*(json, text, ...) functions + * and the json{b?}_extract_path*(json, text, ...) functions */ @@ -373,6 +462,51 @@ json_object_field(PG_FUNCTION_ARGS) } Datum +jsonb_object_field(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + char *key = text_to_cstring(PG_GETARG_TEXT_P(1)); + int klen = strlen(key); + JsonbIterator *it; + JsonbValue v; + int r; + bool skipNested = false; + + if (JB_ROOT_IS_SCALAR(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call jsonb_object_field (jsonb -> text operator) on a scalar"))); + else if (JB_ROOT_IS_ARRAY(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call jsonb_object_field (jsonb -> text operator) on an array"))); + + Assert(JB_ROOT_IS_OBJECT(jb)); + + it = JsonbIteratorInit(VARDATA_ANY(jb)); + + while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE) + { + skipNested = true; + + if (r == WJB_KEY) + { + if (klen == v.string.len && strncmp(key, v.string.val, klen) == 0) + { + /* + * The next thing the iterator fetches should be the value, no + * matter what shape it is. + */ + (void) JsonbIteratorNext(&it, &v, skipNested); + PG_RETURN_JSONB(JsonbValueToJsonb(&v)); + } + } + } + + PG_RETURN_NULL(); +} + +Datum json_object_field_text(PG_FUNCTION_ARGS) { text *json = PG_GETARG_TEXT_P(0); @@ -389,6 +523,74 @@ json_object_field_text(PG_FUNCTION_ARGS) } Datum +jsonb_object_field_text(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + char *key = text_to_cstring(PG_GETARG_TEXT_P(1)); + int klen = strlen(key); + JsonbIterator *it; + JsonbValue v; + int r; + bool skipNested = false; + + if (JB_ROOT_IS_SCALAR(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call jsonb_object_field_text (jsonb ->> text operator) on a scalar"))); + else if (JB_ROOT_IS_ARRAY(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call jsonb_object_field_text (jsonb ->> text operator) on an array"))); + + Assert(JB_ROOT_IS_OBJECT(jb)); + + it = JsonbIteratorInit(VARDATA_ANY(jb)); + + while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE) + { + skipNested = true; + + if (r == WJB_KEY) + { + if (klen == v.string.len && strncmp(key, v.string.val, klen) == 0) + { + text *result; + + /* + * The next thing the iterator fetches should be the value, no + * matter what shape it is. + */ + r = JsonbIteratorNext(&it, &v, skipNested); + + /* + * if it's a scalar string it needs to be de-escaped, + * otherwise just return the text + */ + if (v.type == jbvString) + { + result = cstring_to_text_with_len(v.string.val, v.string.len); + } + else if (v.type == jbvNull) + { + PG_RETURN_NULL(); + } + else + { + StringInfo jtext = makeStringInfo(); + Jsonb *tjb = JsonbValueToJsonb(&v); + + (void) JsonbToCString(jtext, VARDATA(tjb), -1); + result = cstring_to_text_with_len(jtext->data, jtext->len); + } + PG_RETURN_TEXT_P(result); + } + } + } + + PG_RETURN_NULL(); +} + +Datum json_array_element(PG_FUNCTION_ARGS) { text *json = PG_GETARG_TEXT_P(0); @@ -404,6 +606,44 @@ json_array_element(PG_FUNCTION_ARGS) } Datum +jsonb_array_element(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + int element = PG_GETARG_INT32(1); + JsonbIterator *it; + JsonbValue v; + int r; + bool skipNested = false; + int element_number = 0; + + if (JB_ROOT_IS_SCALAR(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call jsonb_array_element (jsonb -> int operator) on a scalar"))); + else if (JB_ROOT_IS_OBJECT(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call jsonb_array_element (jsonb -> int operator) on an object"))); + + Assert(JB_ROOT_IS_ARRAY(jb)); + + it = JsonbIteratorInit(VARDATA_ANY(jb)); + + while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE) + { + skipNested = true; + + if (r == WJB_ELEM) + { + if (element_number++ == element) + PG_RETURN_JSONB(JsonbValueToJsonb(&v)); + } + } + + PG_RETURN_NULL(); +} + +Datum json_array_element_text(PG_FUNCTION_ARGS) { text *json = PG_GETARG_TEXT_P(0); @@ -419,6 +659,69 @@ json_array_element_text(PG_FUNCTION_ARGS) } Datum +jsonb_array_element_text(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + int element = PG_GETARG_INT32(1); + JsonbIterator *it; + JsonbValue v; + int r; + bool skipNested = false; + int element_number = 0; + + + if (JB_ROOT_IS_SCALAR(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call jsonb_array_element_text on a scalar"))); + else if (JB_ROOT_IS_OBJECT(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call jsonb_array_element_text on an object"))); + + Assert(JB_ROOT_IS_ARRAY(jb)); + + it = JsonbIteratorInit(VARDATA_ANY(jb)); + + while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE) + { + skipNested = true; + + if (r == WJB_ELEM) + { + if (element_number++ == element) + { + /* + * if it's a scalar string it needs to be de-escaped, + * otherwise just return the text + */ + text *result; + + if (v.type == jbvString) + { + result = cstring_to_text_with_len(v.string.val, v.string.len); + } + else if (v.type == jbvNull) + { + PG_RETURN_NULL(); + } + else + { + StringInfo jtext = makeStringInfo(); + Jsonb *tjb = JsonbValueToJsonb(&v); + + (void) JsonbToCString(jtext, VARDATA(tjb), -1); + result = cstring_to_text_with_len(jtext->data, jtext->len); + } + PG_RETURN_TEXT_P(result); + } + } + } + + PG_RETURN_NULL(); +} + +Datum json_extract_path(PG_FUNCTION_ARGS) { return get_path_all(fcinfo, false); @@ -434,9 +737,9 @@ json_extract_path_text(PG_FUNCTION_ARGS) * common routine for extract_path functions */ static inline Datum -get_path_all(PG_FUNCTION_ARGS, bool as_text) +get_path_all(FunctionCallInfo fcinfo, bool as_text) { - text *json = PG_GETARG_TEXT_P(0); + text *json; ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); text *result; Datum *pathtext; @@ -448,6 +751,8 @@ get_path_all(PG_FUNCTION_ARGS, bool as_text) long ind; char *endptr; + json = PG_GETARG_TEXT_P(0); + if (array_contains_nulls(path)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -486,8 +791,9 @@ get_path_all(PG_FUNCTION_ARGS, bool as_text) result = get_worker(json, NULL, -1, tpath, ipath, npath, as_text); if (result != NULL) - PG_RETURN_TEXT_P(result); + PG_RETURN_TEXT_P(result); else + /* null is NULL, regardless */ PG_RETURN_NULL(); } @@ -668,7 +974,7 @@ get_object_field_end(void *state, char *fname, bool isnull) /* * make a text object from the string from the prevously noted json * start up to the end of the previous token (the lexer is by now - * ahead of us on whatevere came after what we're interested in). + * ahead of us on whatever came after what we're interested in). */ int len = _state->lex->prev_token_terminator - _state->result_start; @@ -822,18 +1128,139 @@ get_scalar(void *state, char *token, JsonTokenType tokentype) } +Datum +jsonb_extract_path(PG_FUNCTION_ARGS) +{ + return get_jsonb_path_all(fcinfo, false); +} + +Datum +jsonb_extract_path_text(PG_FUNCTION_ARGS) +{ + return get_jsonb_path_all(fcinfo, true); +} + +static inline Datum +get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); + Datum *pathtext; + bool *pathnulls; + int npath; + int i; + Jsonb *res; + bool have_object = false, + have_array = false; + JsonbValue *jbvp = NULL; + JsonbValue tv; + JsonbSuperHeader superHeader; + + if (array_contains_nulls(path)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call function with null path elements"))); + + deconstruct_array(path, TEXTOID, -1, false, 'i', + &pathtext, &pathnulls, &npath); + + if (JB_ROOT_IS_OBJECT(jb)) + have_object = true; + else if (JB_ROOT_IS_ARRAY(jb) && !JB_ROOT_IS_SCALAR(jb)) + have_array = true; + + superHeader = (JsonbSuperHeader) VARDATA(jb); + + for (i = 0; i < npath; i++) + { + if (have_object) + { + jbvp = findJsonbValueFromSuperHeaderLen(superHeader, + JB_FOBJECT, + VARDATA_ANY(pathtext[i]), + VARSIZE_ANY_EXHDR(pathtext[i])); + } + else if (have_array) + { + long lindex; + uint32 index; + char *indextext = TextDatumGetCString(pathtext[i]); + char *endptr; + + lindex = strtol(indextext, &endptr, 10); + if (*endptr != '\0' || lindex > INT_MAX || lindex < 0) + PG_RETURN_NULL(); + index = (uint32) lindex; + jbvp = getIthJsonbValueFromSuperHeader(superHeader, index); + } + else + { + if (i == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call extract path from a scalar"))); + PG_RETURN_NULL(); + } + + if (jbvp == NULL) + PG_RETURN_NULL(); + else if (i == npath - 1) + break; + + if (jbvp->type == jbvBinary) + { + JsonbIterator *it = JsonbIteratorInit(jbvp->binary.data); + int r; + + r = JsonbIteratorNext(&it, &tv, true); + superHeader = (JsonbSuperHeader) jbvp->binary.data; + have_object = r == WJB_BEGIN_OBJECT; + have_array = r == WJB_BEGIN_ARRAY; + } + else + { + have_object = jbvp->type == jbvObject; + have_array = jbvp->type == jbvArray; + } + } + + if (as_text) + { + if (jbvp->type == jbvString) + PG_RETURN_TEXT_P(cstring_to_text_with_len(jbvp->string.val, jbvp->string.len)); + else if (jbvp->type == jbvNull) + PG_RETURN_NULL(); + } + + res = JsonbValueToJsonb(jbvp); + + if (as_text) + { + PG_RETURN_TEXT_P(cstring_to_text(JsonbToCString(NULL, + VARDATA(res), + VARSIZE(res)))); + } + else + { + /* not text mode - just hand back the jsonb */ + PG_RETURN_JSONB(res); + } +} + /* * SQL function json_array_length(json) -> int */ Datum json_array_length(PG_FUNCTION_ARGS) { - text *json = PG_GETARG_TEXT_P(0); + text *json; AlenState *state; - JsonLexContext *lex = makeJsonLexContext(json, false); + JsonLexContext *lex; JsonSemAction *sem; + json = PG_GETARG_TEXT_P(0); + lex = makeJsonLexContext(json, false); state = palloc0(sizeof(AlenState)); sem = palloc0(sizeof(JsonSemAction)); @@ -853,6 +1280,23 @@ json_array_length(PG_FUNCTION_ARGS) PG_RETURN_INT32(state->count); } +Datum +jsonb_array_length(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + + if (JB_ROOT_IS_SCALAR(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot get array length of a scalar"))); + else if (!JB_ROOT_IS_ARRAY(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot get array length of a non-array"))); + + PG_RETURN_INT32(JB_ROOT_COUNT(jb)); +} + /* * These next two check ensure that the json is an array (since it can't be * a scalar or an object). @@ -909,22 +1353,177 @@ json_each(PG_FUNCTION_ARGS) } Datum +jsonb_each(PG_FUNCTION_ARGS) +{ + return each_worker_jsonb(fcinfo, false); +} + +Datum json_each_text(PG_FUNCTION_ARGS) { return each_worker(fcinfo, true); } +Datum +jsonb_each_text(PG_FUNCTION_ARGS) +{ + return each_worker_jsonb(fcinfo, true); +} + static inline Datum -each_worker(PG_FUNCTION_ARGS, bool as_text) +each_worker_jsonb(FunctionCallInfo fcinfo, bool as_text) { - text *json = PG_GETARG_TEXT_P(0); - JsonLexContext *lex = makeJsonLexContext(json, true); + Jsonb *jb = PG_GETARG_JSONB(0); + ReturnSetInfo *rsi; + Tuplestorestate *tuple_store; + TupleDesc tupdesc; + TupleDesc ret_tdesc; + MemoryContext old_cxt, + tmp_cxt; + bool skipNested = false; + JsonbIterator *it; + JsonbValue v; + int r; + + if (!JB_ROOT_IS_OBJECT(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call jsonb_each%s on a non-object", + as_text ? "_text" : ""))); + + rsi = (ReturnSetInfo *) fcinfo->resultinfo; + + if (!rsi || !IsA(rsi, ReturnSetInfo) || + (rsi->allowedModes & SFRM_Materialize) == 0 || + rsi->expectedDesc == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that " + "cannot accept a set"))); + + + rsi->returnMode = SFRM_Materialize; + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); + + old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory); + + ret_tdesc = CreateTupleDescCopy(tupdesc); + BlessTupleDesc(ret_tdesc); + tuple_store = + tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random, + false, work_mem); + + MemoryContextSwitchTo(old_cxt); + + tmp_cxt = AllocSetContextCreate(CurrentMemoryContext, + "jsonb_each temporary cxt", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + + + it = JsonbIteratorInit(VARDATA_ANY(jb)); + + while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE) + { + skipNested = true; + + if (r == WJB_KEY) + { + text *key; + HeapTuple tuple; + Datum values[2]; + bool nulls[2] = {false, false}; + + /* Use the tmp context so we can clean up after each tuple is done */ + old_cxt = MemoryContextSwitchTo(tmp_cxt); + + key = cstring_to_text_with_len(v.string.val, v.string.len); + + /* + * The next thing the iterator fetches should be the value, no + * matter what shape it is. + */ + r = JsonbIteratorNext(&it, &v, skipNested); + + values[0] = PointerGetDatum(key); + + if (as_text) + { + if (v.type == jbvNull) + { + /* a json null is an sql null in text mode */ + nulls[1] = true; + values[1] = (Datum) NULL; + } + else + { + text *sv; + + if (v.type == jbvString) + { + /* In text mode, scalar strings should be dequoted */ + sv = cstring_to_text_with_len(v.string.val, v.string.len); + } + else + { + /* Turn anything else into a json string */ + StringInfo jtext = makeStringInfo(); + Jsonb *jb = JsonbValueToJsonb(&v); + + (void) JsonbToCString(jtext, VARDATA(jb), 2 * v.estSize); + sv = cstring_to_text_with_len(jtext->data, jtext->len); + } + + values[1] = PointerGetDatum(sv); + } + } + else + { + /* Not in text mode, just return the Jsonb */ + Jsonb *val = JsonbValueToJsonb(&v); + + values[1] = PointerGetDatum(val); + } + + tuple = heap_form_tuple(ret_tdesc, values, nulls); + + tuplestore_puttuple(tuple_store, tuple); + + /* clean up and switch back */ + MemoryContextSwitchTo(old_cxt); + MemoryContextReset(tmp_cxt); + } + } + + MemoryContextDelete(tmp_cxt); + + rsi->setResult = tuple_store; + rsi->setDesc = ret_tdesc; + + PG_RETURN_NULL(); +} + + +static inline Datum +each_worker(FunctionCallInfo fcinfo, bool as_text) +{ + text *json; + JsonLexContext *lex; JsonSemAction *sem; ReturnSetInfo *rsi; MemoryContext old_cxt; TupleDesc tupdesc; EachState *state; + json = PG_GETARG_TEXT_P(0); + + lex = makeJsonLexContext(json, true); state = palloc0(sizeof(EachState)); sem = palloc0(sizeof(JsonSemAction)); @@ -941,11 +1540,7 @@ each_worker(PG_FUNCTION_ARGS, bool as_text) rsi->returnMode = SFRM_Materialize; - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("function returning record called in context " - "that cannot accept type record"))); + (void) get_call_result_type(fcinfo, NULL, &tupdesc); /* make these in a sufficiently long-lived memory context */ old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory); @@ -1087,6 +1682,146 @@ each_scalar(void *state, char *token, JsonTokenType tokentype) * * a lot of this processing is similar to the json_each* functions */ + +Datum +jsonb_array_elements(PG_FUNCTION_ARGS) +{ + return elements_worker_jsonb(fcinfo, false); +} + +Datum +jsonb_array_elements_text(PG_FUNCTION_ARGS) +{ + return elements_worker_jsonb(fcinfo, true); +} + +static inline Datum +elements_worker_jsonb(FunctionCallInfo fcinfo, bool as_text) +{ + Jsonb *jb = PG_GETARG_JSONB(0); + ReturnSetInfo *rsi; + Tuplestorestate *tuple_store; + TupleDesc tupdesc; + TupleDesc ret_tdesc; + MemoryContext old_cxt, + tmp_cxt; + bool skipNested = false; + JsonbIterator *it; + JsonbValue v; + int r; + + if (JB_ROOT_IS_SCALAR(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot extract elements from a scalar"))); + else if (!JB_ROOT_IS_ARRAY(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot extract elements from an object"))); + + rsi = (ReturnSetInfo *) fcinfo->resultinfo; + + if (!rsi || !IsA(rsi, ReturnSetInfo) || + (rsi->allowedModes & SFRM_Materialize) == 0 || + rsi->expectedDesc == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that " + "cannot accept a set"))); + + + rsi->returnMode = SFRM_Materialize; + + /* it's a simple type, so don't use get_call_result_type() */ + tupdesc = rsi->expectedDesc; + + old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory); + + ret_tdesc = CreateTupleDescCopy(tupdesc); + BlessTupleDesc(ret_tdesc); + tuple_store = + tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random, + false, work_mem); + + MemoryContextSwitchTo(old_cxt); + + tmp_cxt = AllocSetContextCreate(CurrentMemoryContext, + "jsonb_each temporary cxt", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + + + it = JsonbIteratorInit(VARDATA_ANY(jb)); + + while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE) + { + skipNested = true; + + if (r == WJB_ELEM) + { + HeapTuple tuple; + Datum values[1]; + bool nulls[1] = {false}; + + /* use the tmp context so we can clean up after each tuple is done */ + old_cxt = MemoryContextSwitchTo(tmp_cxt); + + if (!as_text) + { + Jsonb *val = JsonbValueToJsonb(&v); + + values[0] = PointerGetDatum(val); + } + else + { + if (v.type == jbvNull) + { + /* a json null is an sql null in text mode */ + nulls[0] = true; + values[0] = (Datum) NULL; + } + else + { + text *sv; + + if (v.type == jbvString) + { + /* in text mode scalar strings should be dequoted */ + sv = cstring_to_text_with_len(v.string.val, v.string.len); + } + else + { + /* turn anything else into a json string */ + StringInfo jtext = makeStringInfo(); + Jsonb *jb = JsonbValueToJsonb(&v); + + (void) JsonbToCString(jtext, VARDATA(jb), 2 * v.estSize); + sv = cstring_to_text_with_len(jtext->data, jtext->len); + } + + values[0] = PointerGetDatum(sv); + } + } + + tuple = heap_form_tuple(ret_tdesc, values, nulls); + + tuplestore_puttuple(tuple_store, tuple); + + /* clean up and switch back */ + MemoryContextSwitchTo(old_cxt); + MemoryContextReset(tmp_cxt); + } + } + + MemoryContextDelete(tmp_cxt); + + rsi->setResult = tuple_store; + rsi->setDesc = ret_tdesc; + + PG_RETURN_NULL(); +} + Datum json_array_elements(PG_FUNCTION_ARGS) { @@ -1100,7 +1835,7 @@ json_array_elements_text(PG_FUNCTION_ARGS) } static inline Datum -elements_worker(PG_FUNCTION_ARGS, bool as_text) +elements_worker(FunctionCallInfo fcinfo, bool as_text) { text *json = PG_GETARG_TEXT_P(0); @@ -1270,9 +2005,16 @@ elements_scalar(void *state, char *token, JsonTokenType tokentype) * which is in turn partly adapted from record_out. * * The json is decomposed into a hash table, in which each - * field in the record is then looked up by name. + * field in the record is then looked up by name. For jsonb + * we fetch the values direct from the object. */ Datum +jsonb_populate_record(PG_FUNCTION_ARGS) +{ + return populate_record_worker(fcinfo, true); +} + +Datum json_populate_record(PG_FUNCTION_ARGS) { return populate_record_worker(fcinfo, true); @@ -1285,11 +2027,14 @@ json_to_record(PG_FUNCTION_ARGS) } static inline Datum -populate_record_worker(PG_FUNCTION_ARGS, bool have_record_arg) +populate_record_worker(FunctionCallInfo fcinfo, bool have_record_arg) { + Oid argtype; + Oid jtype = get_fn_expr_argtype(fcinfo->flinfo, have_record_arg ? 1 : 0); text *json; + Jsonb *jb = NULL; bool use_json_as_text; - HTAB *json_hash; + HTAB *json_hash = NULL; HeapTupleHeader rec = NULL; Oid tupType = InvalidOid; int32 tupTypmod = -1; @@ -1301,19 +2046,20 @@ populate_record_worker(PG_FUNCTION_ARGS, bool have_record_arg) int i; Datum *values; bool *nulls; - char fname[NAMEDATALEN]; - JsonHashEntry *hashentry; + + Assert(jtype == JSONOID || jtype == JSONBOID); + + use_json_as_text = PG_ARGISNULL(have_record_arg ? 2 : 1) ? false : + PG_GETARG_BOOL(have_record_arg ? 2 : 1); if (have_record_arg) { - Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, 0); - - use_json_as_text = PG_ARGISNULL(2) ? false : PG_GETARG_BOOL(2); + argtype = get_fn_expr_argtype(fcinfo->flinfo, 0); if (!type_is_rowtype(argtype)) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("first argument of json_populate_record must be a row type"))); + errmsg("first argument of json%s_populate_record must be a row type", jtype == JSONBOID ? "b" : ""))); if (PG_ARGISNULL(0)) { @@ -1340,19 +2086,16 @@ populate_record_worker(PG_FUNCTION_ARGS, bool have_record_arg) tupTypmod = HeapTupleHeaderGetTypMod(rec); } - json = PG_GETARG_TEXT_P(1); + tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); } else - { - /* json_to_record case */ + { /* json{b}_to_record case */ use_json_as_text = PG_ARGISNULL(1) ? false : PG_GETARG_BOOL(1); if (PG_ARGISNULL(0)) PG_RETURN_NULL(); - json = PG_GETARG_TEXT_P(0); - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -1362,11 +2105,13 @@ populate_record_worker(PG_FUNCTION_ARGS, bool have_record_arg) "using a column definition list."))); } - json_hash = get_json_object_as_hash(json, "json_populate_record", - use_json_as_text); - - if (have_record_arg) + if (jtype == JSONOID) { + /* just get the text */ + json = PG_GETARG_TEXT_P(have_record_arg ? 1 : 0); + + json_hash = get_json_object_as_hash(json, "json_populate_record", use_json_as_text); + /* * if the input json is empty, we can only skip the rest if we were * passed in a non-null record, since otherwise there may be issues @@ -1375,8 +2120,14 @@ populate_record_worker(PG_FUNCTION_ARGS, bool have_record_arg) if (hash_get_num_entries(json_hash) == 0 && rec) PG_RETURN_POINTER(rec); + } + else + { + jb = PG_GETARG_JSONB(have_record_arg ? 1 : 0); - tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); + /* same logic as for json */ + if (!have_record_arg && rec) + PG_RETURN_POINTER(rec); } ncolumns = tupdesc->natts; @@ -1439,7 +2190,9 @@ populate_record_worker(PG_FUNCTION_ARGS, bool have_record_arg) { ColumnIOData *column_info = &my_extra->columns[i]; Oid column_type = tupdesc->attrs[i]->atttypid; - char *value; + JsonbValue *v = NULL; + char fname[NAMEDATALEN]; + JsonHashEntry *hashentry = NULL; /* Ignore dropped columns in datatype */ if (tupdesc->attrs[i]->attisdropped) @@ -1448,9 +2201,20 @@ populate_record_worker(PG_FUNCTION_ARGS, bool have_record_arg) continue; } - memset(fname, 0, NAMEDATALEN); - strncpy(fname, NameStr(tupdesc->attrs[i]->attname), NAMEDATALEN); - hashentry = hash_search(json_hash, fname, HASH_FIND, NULL); + if (jtype == JSONOID) + { + + memset(fname, 0, NAMEDATALEN); + strncpy(fname, NameStr(tupdesc->attrs[i]->attname), NAMEDATALEN); + hashentry = hash_search(json_hash, fname, HASH_FIND, NULL); + } + else + { + char *key = NameStr(tupdesc->attrs[i]->attname); + + v = findJsonbValueFromSuperHeaderLen(VARDATA(jb), JB_FOBJECT, key, + strlen(key)); + } /* * we can't just skip here if the key wasn't found since we might have @@ -1460,7 +2224,8 @@ populate_record_worker(PG_FUNCTION_ARGS, bool have_record_arg) * then every field which we don't populate needs to be run through * the input function just in case it's a domain type. */ - if (hashentry == NULL && rec) + if (((jtype == JSONOID && hashentry == NULL) || + (jtype == JSONBOID && v == NULL)) && rec) continue; /* @@ -1475,7 +2240,8 @@ populate_record_worker(PG_FUNCTION_ARGS, bool have_record_arg) fcinfo->flinfo->fn_mcxt); column_info->column_type = column_type; } - if (hashentry == NULL || hashentry->isnull) + if ((jtype == JSONOID && (hashentry == NULL || hashentry->isnull)) || + (jtype == JSONBOID && (v == NULL || v->type == jbvNull))) { /* * need InputFunctionCall to happen even for nulls, so that domain @@ -1488,9 +2254,33 @@ populate_record_worker(PG_FUNCTION_ARGS, bool have_record_arg) } else { - value = hashentry->val; + char *s = NULL; - values[i] = InputFunctionCall(&column_info->proc, value, + if (jtype == JSONOID) + { + /* already done the hard work in the json case */ + s = hashentry->val; + } + else + { + if (v->type == jbvString) + s = pnstrdup(v->string.val, v->string.len); + else if (v->type == jbvBool) + s = pnstrdup((v->boolean) ? "t" : "f", 1); + else if (v->type == jbvNumeric) + s = DatumGetCString(DirectFunctionCall1(numeric_out, + PointerGetDatum(v->numeric))); + else if (!use_json_as_text) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot populate with a nested object unless use_json_as_text is true"))); + else if (v->type == jbvBinary) + s = JsonbToCString(NULL, v->binary.data, v->binary.len); + else + elog(ERROR, "invalid jsonb type"); + } + + values[i] = InputFunctionCall(&column_info->proc, s, column_info->typioparam, tupdesc->attrs[i]->atttypmod); nulls[i] = false; @@ -1656,6 +2446,134 @@ hash_scalar(void *state, char *token, JsonTokenType tokentype) * per object in the array. */ Datum +jsonb_populate_recordset(PG_FUNCTION_ARGS) +{ + return populate_recordset_worker(fcinfo, true); +} + +static void +make_row_from_rec_and_jsonb(Jsonb * element, PopulateRecordsetState *state) +{ + Datum *values; + bool *nulls; + int i; + RecordIOData *my_extra = state->my_extra; + int ncolumns = my_extra->ncolumns; + TupleDesc tupdesc = state->ret_tdesc; + HeapTupleHeader rec = state->rec; + HeapTuple rettuple; + + values = (Datum *) palloc(ncolumns * sizeof(Datum)); + nulls = (bool *) palloc(ncolumns * sizeof(bool)); + + if (state->rec) + { + HeapTupleData tuple; + + /* Build a temporary HeapTuple control structure */ + tuple.t_len = HeapTupleHeaderGetDatumLength(state->rec); + ItemPointerSetInvalid(&(tuple.t_self)); + tuple.t_tableOid = InvalidOid; + tuple.t_data = state->rec; + + /* Break down the tuple into fields */ + heap_deform_tuple(&tuple, tupdesc, values, nulls); + } + else + { + for (i = 0; i < ncolumns; ++i) + { + values[i] = (Datum) 0; + nulls[i] = true; + } + } + + for (i = 0; i < ncolumns; ++i) + { + ColumnIOData *column_info = &my_extra->columns[i]; + Oid column_type = tupdesc->attrs[i]->atttypid; + JsonbValue *v = NULL; + char *key; + + /* Ignore dropped columns in datatype */ + if (tupdesc->attrs[i]->attisdropped) + { + nulls[i] = true; + continue; + } + + key = NameStr(tupdesc->attrs[i]->attname); + + v = findJsonbValueFromSuperHeaderLen(VARDATA(element), JB_FOBJECT, + key, strlen(key)); + + /* + * We can't just skip here if the key wasn't found since we might have + * a domain to deal with. If we were passed in a non-null record + * datum, we assume that the existing values are valid (if they're + * not, then it's not our fault), but if we were passed in a null, + * then every field which we don't populate needs to be run through + * the input function just in case it's a domain type. + */ + if (v == NULL && rec) + continue; + + /* + * Prepare to convert the column value from text + */ + if (column_info->column_type != column_type) + { + getTypeInputInfo(column_type, + &column_info->typiofunc, + &column_info->typioparam); + fmgr_info_cxt(column_info->typiofunc, &column_info->proc, + state->fn_mcxt); + column_info->column_type = column_type; + } + if (v == NULL || v->type == jbvNull) + { + /* + * Need InputFunctionCall to happen even for nulls, so that domain + * checks are done + */ + values[i] = InputFunctionCall(&column_info->proc, NULL, + column_info->typioparam, + tupdesc->attrs[i]->atttypmod); + nulls[i] = true; + } + else + { + char *s = NULL; + + if (v->type == jbvString) + s = pnstrdup(v->string.val, v->string.len); + else if (v->type == jbvBool) + s = pnstrdup((v->boolean) ? "t" : "f", 1); + else if (v->type == jbvNumeric) + s = DatumGetCString(DirectFunctionCall1(numeric_out, + PointerGetDatum(v->numeric))); + else if (!state->use_json_as_text) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot populate with a nested object unless use_json_as_text is true"))); + else if (v->type == jbvBinary) + s = JsonbToCString(NULL, v->binary.data, v->binary.len); + else + elog(ERROR, "invalid jsonb type"); + + values[i] = InputFunctionCall(&column_info->proc, s, + column_info->typioparam, + tupdesc->attrs[i]->atttypmod); + nulls[i] = false; + } + } + + rettuple = heap_form_tuple(tupdesc, values, nulls); + + tuplestore_puttuple(state->tuple_store, rettuple); +} + +Datum json_populate_recordset(PG_FUNCTION_ARGS) { return populate_recordset_worker(fcinfo, true); @@ -1671,10 +2589,10 @@ json_to_recordset(PG_FUNCTION_ARGS) * common worker for json_populate_recordset() and json_to_recordset() */ static inline Datum -populate_recordset_worker(PG_FUNCTION_ARGS, bool have_record_arg) +populate_recordset_worker(FunctionCallInfo fcinfo, bool have_record_arg) { Oid argtype; - text *json; + Oid jtype = get_fn_expr_argtype(fcinfo->flinfo, have_record_arg ? 1 : 0); bool use_json_as_text; ReturnSetInfo *rsi; MemoryContext old_cxt; @@ -1684,8 +2602,6 @@ populate_recordset_worker(PG_FUNCTION_ARGS, bool have_record_arg) TupleDesc tupdesc; RecordIOData *my_extra; int ncolumns; - JsonLexContext *lex; - JsonSemAction *sem; PopulateRecordsetState *state; if (have_record_arg) @@ -1721,7 +2637,8 @@ populate_recordset_worker(PG_FUNCTION_ARGS, bool have_record_arg) /* * get the tupdesc from the result set info - it must be a record type - * because we already checked that arg1 is a record type. + * because we already checked that arg1 is a record type, or we're in a + * to_record function which returns a setof record. */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) ereport(ERROR, @@ -1729,29 +2646,12 @@ populate_recordset_worker(PG_FUNCTION_ARGS, bool have_record_arg) errmsg("function returning record called in context " "that cannot accept type record"))); - state = palloc0(sizeof(PopulateRecordsetState)); - sem = palloc0(sizeof(JsonSemAction)); - - - /* make these in a sufficiently long-lived memory context */ - old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory); - - state->ret_tdesc = CreateTupleDescCopy(tupdesc); - BlessTupleDesc(state->ret_tdesc); - state->tuple_store = - tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random, - false, work_mem); - - MemoryContextSwitchTo(old_cxt); - /* if the json is null send back an empty set */ if (have_record_arg) { if (PG_ARGISNULL(1)) PG_RETURN_NULL(); - json = PG_GETARG_TEXT_P(1); - if (PG_ARGISNULL(0)) rec = NULL; else @@ -1759,11 +2659,9 @@ populate_recordset_worker(PG_FUNCTION_ARGS, bool have_record_arg) } else { - if (PG_ARGISNULL(0)) + if (PG_ARGISNULL(1)) PG_RETURN_NULL(); - json = PG_GETARG_TEXT_P(0); - rec = NULL; } @@ -1771,8 +2669,6 @@ populate_recordset_worker(PG_FUNCTION_ARGS, bool have_record_arg) tupTypmod = tupdesc->tdtypmod; ncolumns = tupdesc->natts; - lex = makeJsonLexContext(json, true); - /* * We arrange to look up the needed I/O info just once per series of * calls, assuming the record type doesn't change underneath us. @@ -1801,23 +2697,80 @@ populate_recordset_worker(PG_FUNCTION_ARGS, bool have_record_arg) my_extra->ncolumns = ncolumns; } - sem->semstate = (void *) state; - sem->array_start = populate_recordset_array_start; - sem->array_element_start = populate_recordset_array_element_start; - sem->scalar = populate_recordset_scalar; - sem->object_field_start = populate_recordset_object_field_start; - sem->object_field_end = populate_recordset_object_field_end; - sem->object_start = populate_recordset_object_start; - sem->object_end = populate_recordset_object_end; + state = palloc0(sizeof(PopulateRecordsetState)); - state->lex = lex; + /* make these in a sufficiently long-lived memory context */ + old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory); + state->ret_tdesc = CreateTupleDescCopy(tupdesc);; + BlessTupleDesc(state->ret_tdesc); + state->tuple_store = tuplestore_begin_heap(rsi->allowedModes & + SFRM_Materialize_Random, + false, work_mem); + MemoryContextSwitchTo(old_cxt); state->my_extra = my_extra; state->rec = rec; state->use_json_as_text = use_json_as_text; state->fn_mcxt = fcinfo->flinfo->fn_mcxt; - pg_parse_json(lex, sem); + if (jtype == JSONOID) + { + text *json = PG_GETARG_TEXT_P(have_record_arg ? 1 : 0); + JsonLexContext *lex; + JsonSemAction *sem; + + sem = palloc0(sizeof(JsonSemAction)); + + lex = makeJsonLexContext(json, true); + + sem->semstate = (void *) state; + sem->array_start = populate_recordset_array_start; + sem->array_element_start = populate_recordset_array_element_start; + sem->scalar = populate_recordset_scalar; + sem->object_field_start = populate_recordset_object_field_start; + sem->object_field_end = populate_recordset_object_field_end; + sem->object_start = populate_recordset_object_start; + sem->object_end = populate_recordset_object_end; + + state->lex = lex; + + pg_parse_json(lex, sem); + + } + else + { + Jsonb *jb; + JsonbIterator *it; + JsonbValue v; + bool skipNested = false; + int r; + + Assert(jtype == JSONBOID); + jb = PG_GETARG_JSONB(have_record_arg ? 1 : 0); + + if (JB_ROOT_IS_SCALAR(jb) || !JB_ROOT_IS_ARRAY(jb)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot call jsonb_populate_recordset on non-array"))); + + it = JsonbIteratorInit(VARDATA_ANY(jb)); + + while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE) + { + skipNested = true; + + if (r == WJB_ELEM) + { + Jsonb *element = JsonbValueToJsonb(&v); + + if (!JB_ROOT_IS_OBJECT(element)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("jsonb_populate_recordset argument must be an array of objects"))); + make_row_from_rec_and_jsonb(element, state); + } + } + } rsi->setResult = state->tuple_store; rsi->setDesc = state->ret_tdesc; @@ -2067,3 +3020,19 @@ populate_recordset_object_field_end(void *state, char *fname, bool isnull) hashentry->val = _state->saved_scalar; } } + +/* + * findJsonbValueFromSuperHeader() wrapper that sets up JsonbValue key string. + */ +static JsonbValue * +findJsonbValueFromSuperHeaderLen(JsonbSuperHeader sheader, uint32 flags, + char *key, uint32 keylen) +{ + JsonbValue k; + + k.type = jbvString; + k.string.val = key; + k.string.len = keylen; + + return findJsonbValueFromSuperHeader(sheader, flags, NULL, &k); +} diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index b78451dda01..64eb0f8d16e 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -627,6 +627,44 @@ numeric_out_sci(Numeric num, int scale) } /* + * numeric_normalize() - + * + * Output function for numeric data type without trailing zeroes. + */ +char * +numeric_normalize(Numeric num) +{ + NumericVar x; + char *str; + int orig, last; + + /* + * Handle NaN + */ + if (NUMERIC_IS_NAN(num)) + return pstrdup("NaN"); + + init_var_from_num(num, &x); + + str = get_str_from_var(&x); + + orig = last = strlen(str) - 1; + + for (;;) + { + if (last == 0 || str[last] != '0') + break; + + last--; + } + + if (last > 0 && last != orig) + str[last] = '\0'; + + return str; +} + +/* * numeric_recv - converts external binary format to numeric * * External format is a sequence of int16's: |