aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/json.c
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2023-03-29 12:11:36 +0200
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2023-03-29 12:11:36 +0200
commit7081ac46ace8c459966174400b53418683c9fe5c (patch)
tree52590ea33eb07a2d7acf4a1461101932c3c88757 /src/backend/utils/adt/json.c
parent38b7437b9088b4859e4489a1a1a9ab7066f5b320 (diff)
downloadpostgresql-7081ac46ace8c459966174400b53418683c9fe5c.tar.gz
postgresql-7081ac46ace8c459966174400b53418683c9fe5c.zip
SQL/JSON: add standard JSON constructor functions
This commit introduces the SQL/JSON standard-conforming constructors for JSON types: JSON_ARRAY() JSON_ARRAYAGG() JSON_OBJECT() JSON_OBJECTAGG() Most of the functionality was already present in PostgreSQL-specific functions, but these include some new functionality such as the ability to skip or include NULL values, and to allow duplicate keys or throw error when they are found, as well as the standard specified syntax to specify output type and format. Author: Nikita Glukhov <n.gluhov@postgrespro.ru> Author: Teodor Sigaev <teodor@sigaev.ru> Author: Oleg Bartunov <obartunov@gmail.com> Author: Alexander Korotkov <aekorotkov@gmail.com> Author: Amit Langote <amitlangote09@gmail.com> Reviewers have included (in no particular order) Andres Freund, Alexander Korotkov, Pavel Stehule, Andrew Alsup, Erik Rijkers, Zihong Yu, Himanshu Upadhyaya, Daniel Gustafsson, Justin Pryzby. Discussion: https://postgr.es/m/CAF4Au4w2x-5LTnN_bxky-mq4=WOqsGsxSpENCzHRAzSnEd8+WQ@mail.gmail.com Discussion: https://postgr.es/m/cd0bb935-0158-78a7-08b5-904886deac4b@postgrespro.ru Discussion: https://postgr.es/m/20220616233130.rparivafipt6doj3@alap3.anarazel.de Discussion: https://postgr.es/m/abd9b83b-aa66-f230-3d6d-734817f0995d%40postgresql.org
Diffstat (limited to 'src/backend/utils/adt/json.c')
-rw-r--r--src/backend/utils/adt/json.c434
1 files changed, 386 insertions, 48 deletions
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 06d8bea9cde..dcd2bb2234a 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -13,7 +13,9 @@
*/
#include "postgres.h"
+#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
+#include "common/hashfn.h"
#include "funcapi.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
@@ -42,6 +44,34 @@ typedef enum /* type categories for datum_to_json */
JSONTYPE_OTHER /* all else */
} JsonTypeCategory;
+
+/*
+ * Support for fast key uniqueness checking.
+ *
+ * We maintain a hash table of used keys in JSON objects for fast detection
+ * of duplicates.
+ */
+/* Common context for key uniqueness check */
+typedef struct HTAB *JsonUniqueCheckState; /* hash table for key names */
+
+/* Hash entry for JsonUniqueCheckState */
+typedef struct JsonUniqueHashEntry
+{
+ const char *key;
+ int key_len;
+ int object_id;
+} JsonUniqueHashEntry;
+
+/* Context struct for key uniqueness check during JSON building */
+typedef struct JsonUniqueBuilderState
+{
+ JsonUniqueCheckState check; /* unique check */
+ StringInfoData skipped_keys; /* skipped keys with NULL values */
+ MemoryContext mcxt; /* context for saving skipped keys */
+} JsonUniqueBuilderState;
+
+
+/* State struct for JSON aggregation */
typedef struct JsonAggState
{
StringInfo str;
@@ -49,6 +79,7 @@ typedef struct JsonAggState
Oid key_output_func;
JsonTypeCategory val_category;
Oid val_output_func;
+ JsonUniqueBuilderState unique_check;
} JsonAggState;
static void composite_to_json(Datum composite, StringInfo result,
@@ -724,6 +755,48 @@ row_to_json_pretty(PG_FUNCTION_ARGS)
}
/*
+ * Is the given type immutable when coming out of a JSON context?
+ *
+ * At present, datetimes are all considered mutable, because they
+ * depend on timezone. XXX we should also drill down into objects
+ * and arrays, but do not.
+ */
+bool
+to_json_is_immutable(Oid typoid)
+{
+ JsonTypeCategory tcategory;
+ Oid outfuncoid;
+
+ json_categorize_type(typoid, &tcategory, &outfuncoid);
+
+ switch (tcategory)
+ {
+ case JSONTYPE_BOOL:
+ case JSONTYPE_JSON:
+ case JSONTYPE_NULL:
+ return true;
+
+ case JSONTYPE_DATE:
+ case JSONTYPE_TIMESTAMP:
+ case JSONTYPE_TIMESTAMPTZ:
+ return false;
+
+ case JSONTYPE_ARRAY:
+ return false; /* TODO recurse into elements */
+
+ case JSONTYPE_COMPOSITE:
+ return false; /* TODO recurse into fields */
+
+ case JSONTYPE_NUMERIC:
+ case JSONTYPE_CAST:
+ case JSONTYPE_OTHER:
+ return func_volatile(outfuncoid) == PROVOLATILE_IMMUTABLE;
+ }
+
+ return false; /* not reached */
+}
+
+/*
* SQL function to_json(anyvalue)
*/
Datum
@@ -755,8 +828,8 @@ to_json(PG_FUNCTION_ARGS)
*
* aggregate input column as a json array value.
*/
-Datum
-json_agg_transfn(PG_FUNCTION_ARGS)
+static Datum
+json_agg_transfn_worker(FunctionCallInfo fcinfo, bool absent_on_null)
{
MemoryContext aggcontext,
oldcontext;
@@ -796,9 +869,14 @@ json_agg_transfn(PG_FUNCTION_ARGS)
else
{
state = (JsonAggState *) PG_GETARG_POINTER(0);
- appendStringInfoString(state->str, ", ");
}
+ if (absent_on_null && PG_ARGISNULL(1))
+ PG_RETURN_POINTER(state);
+
+ if (state->str->len > 1)
+ appendStringInfoString(state->str, ", ");
+
/* fast path for NULLs */
if (PG_ARGISNULL(1))
{
@@ -810,7 +888,7 @@ json_agg_transfn(PG_FUNCTION_ARGS)
val = PG_GETARG_DATUM(1);
/* add some whitespace if structured type and not first item */
- if (!PG_ARGISNULL(0) &&
+ if (!PG_ARGISNULL(0) && state->str->len > 1 &&
(state->val_category == JSONTYPE_ARRAY ||
state->val_category == JSONTYPE_COMPOSITE))
{
@@ -828,6 +906,25 @@ json_agg_transfn(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(state);
}
+
+/*
+ * json_agg aggregate function
+ */
+Datum
+json_agg_transfn(PG_FUNCTION_ARGS)
+{
+ return json_agg_transfn_worker(fcinfo, false);
+}
+
+/*
+ * json_agg_strict aggregate function
+ */
+Datum
+json_agg_strict_transfn(PG_FUNCTION_ARGS)
+{
+ return json_agg_transfn_worker(fcinfo, true);
+}
+
/*
* json_agg final function
*/
@@ -851,18 +948,120 @@ json_agg_finalfn(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, "]"));
}
+/* Functions implementing hash table for key uniqueness check */
+static uint32
+json_unique_hash(const void *key, Size keysize)
+{
+ const JsonUniqueHashEntry *entry = (JsonUniqueHashEntry *) key;
+ uint32 hash = hash_bytes_uint32(entry->object_id);
+
+ hash ^= hash_bytes((const unsigned char *) entry->key, entry->key_len);
+
+ return DatumGetUInt32(hash);
+}
+
+static int
+json_unique_hash_match(const void *key1, const void *key2, Size keysize)
+{
+ const JsonUniqueHashEntry *entry1 = (const JsonUniqueHashEntry *) key1;
+ const JsonUniqueHashEntry *entry2 = (const JsonUniqueHashEntry *) key2;
+
+ if (entry1->object_id != entry2->object_id)
+ return entry1->object_id > entry2->object_id ? 1 : -1;
+
+ if (entry1->key_len != entry2->key_len)
+ return entry1->key_len > entry2->key_len ? 1 : -1;
+
+ return strncmp(entry1->key, entry2->key, entry1->key_len);
+}
+
+/*
+ * Uniqueness detection support.
+ *
+ * In order to detect uniqueness during building or parsing of a JSON
+ * object, we maintain a hash table of key names already seen.
+ */
+static void
+json_unique_check_init(JsonUniqueCheckState *cxt)
+{
+ HASHCTL ctl;
+
+ memset(&ctl, 0, sizeof(ctl));
+ ctl.keysize = sizeof(JsonUniqueHashEntry);
+ ctl.entrysize = sizeof(JsonUniqueHashEntry);
+ ctl.hcxt = CurrentMemoryContext;
+ ctl.hash = json_unique_hash;
+ ctl.match = json_unique_hash_match;
+
+ *cxt = hash_create("json object hashtable",
+ 32,
+ &ctl,
+ HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION | HASH_COMPARE);
+}
+
+static void
+json_unique_builder_init(JsonUniqueBuilderState *cxt)
+{
+ json_unique_check_init(&cxt->check);
+ cxt->mcxt = CurrentMemoryContext;
+ cxt->skipped_keys.data = NULL;
+}
+
+static bool
+json_unique_check_key(JsonUniqueCheckState *cxt, const char *key, int object_id)
+{
+ JsonUniqueHashEntry entry;
+ bool found;
+
+ entry.key = key;
+ entry.key_len = strlen(key);
+ entry.object_id = object_id;
+
+ (void) hash_search(*cxt, &entry, HASH_ENTER, &found);
+
+ return !found;
+}
+
+/*
+ * On-demand initialization of a throwaway StringInfo. This is used to
+ * read a key name that we don't need to store in the output object, for
+ * duplicate key detection when the value is NULL.
+ */
+static StringInfo
+json_unique_builder_get_throwawaybuf(JsonUniqueBuilderState *cxt)
+{
+ StringInfo out = &cxt->skipped_keys;
+
+ if (!out->data)
+ {
+ MemoryContext oldcxt = MemoryContextSwitchTo(cxt->mcxt);
+
+ initStringInfo(out);
+ MemoryContextSwitchTo(oldcxt);
+ }
+ else
+ /* Just reset the string to empty */
+ out->len = 0;
+
+ return out;
+}
+
/*
* json_object_agg transition function.
*
* aggregate two input columns as a single json object value.
*/
-Datum
-json_object_agg_transfn(PG_FUNCTION_ARGS)
+static Datum
+json_object_agg_transfn_worker(FunctionCallInfo fcinfo,
+ bool absent_on_null, bool unique_keys)
{
MemoryContext aggcontext,
oldcontext;
JsonAggState *state;
+ StringInfo out;
Datum arg;
+ bool skip;
+ int key_offset;
if (!AggCheckCallContext(fcinfo, &aggcontext))
{
@@ -877,12 +1076,16 @@ json_object_agg_transfn(PG_FUNCTION_ARGS)
/*
* Make the StringInfo in a context where it will persist for the
* duration of the aggregate call. Switching context is only needed
- * for this initial step, as the StringInfo routines make sure they
- * use the right context to enlarge the object if necessary.
+ * for this initial step, as the StringInfo and dynahash routines make
+ * sure they use the right context to enlarge the object if necessary.
*/
oldcontext = MemoryContextSwitchTo(aggcontext);
state = (JsonAggState *) palloc(sizeof(JsonAggState));
state->str = makeStringInfo();
+ if (unique_keys)
+ json_unique_builder_init(&state->unique_check);
+ else
+ memset(&state->unique_check, 0, sizeof(state->unique_check));
MemoryContextSwitchTo(oldcontext);
arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
@@ -910,7 +1113,6 @@ json_object_agg_transfn(PG_FUNCTION_ARGS)
else
{
state = (JsonAggState *) PG_GETARG_POINTER(0);
- appendStringInfoString(state->str, ", ");
}
/*
@@ -923,14 +1125,56 @@ json_object_agg_transfn(PG_FUNCTION_ARGS)
if (PG_ARGISNULL(1))
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("field name must not be null")));
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("null value not allowed for object key")));
+
+ /* Skip null values if absent_on_null */
+ skip = absent_on_null && PG_ARGISNULL(2);
+
+ if (skip)
+ {
+ /*
+ * We got a NULL value and we're not storing those; if we're not
+ * testing key uniqueness, we're done. If we are, use the throwaway
+ * buffer to store the key name so that we can check it.
+ */
+ if (!unique_keys)
+ PG_RETURN_POINTER(state);
+
+ out = json_unique_builder_get_throwawaybuf(&state->unique_check);
+ }
+ else
+ {
+ out = state->str;
+
+ /*
+ * Append comma delimiter only if we have already output some fields
+ * after the initial string "{ ".
+ */
+ if (out->len > 2)
+ appendStringInfoString(out, ", ");
+ }
arg = PG_GETARG_DATUM(1);
- datum_to_json(arg, false, state->str, state->key_category,
+ key_offset = out->len;
+
+ datum_to_json(arg, false, out, state->key_category,
state->key_output_func, true);
+ if (unique_keys)
+ {
+ const char *key = &out->data[key_offset];
+
+ if (!json_unique_check_key(&state->unique_check.check, key, 0))
+ ereport(ERROR,
+ errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE),
+ errmsg("duplicate JSON key %s", key));
+
+ if (skip)
+ PG_RETURN_POINTER(state);
+ }
+
appendStringInfoString(state->str, " : ");
if (PG_ARGISNULL(2))
@@ -945,6 +1189,42 @@ json_object_agg_transfn(PG_FUNCTION_ARGS)
}
/*
+ * json_object_agg aggregate function
+ */
+Datum
+json_object_agg_transfn(PG_FUNCTION_ARGS)
+{
+ return json_object_agg_transfn_worker(fcinfo, false, false);
+}
+
+/*
+ * json_object_agg_strict aggregate function
+ */
+Datum
+json_object_agg_strict_transfn(PG_FUNCTION_ARGS)
+{
+ return json_object_agg_transfn_worker(fcinfo, true, false);
+}
+
+/*
+ * json_object_agg_unique aggregate function
+ */
+Datum
+json_object_agg_unique_transfn(PG_FUNCTION_ARGS)
+{
+ return json_object_agg_transfn_worker(fcinfo, false, true);
+}
+
+/*
+ * json_object_agg_unique_strict aggregate function
+ */
+Datum
+json_object_agg_unique_strict_transfn(PG_FUNCTION_ARGS)
+{
+ return json_object_agg_transfn_worker(fcinfo, true, true);
+}
+
+/*
* json_object_agg final function.
*/
Datum
@@ -985,25 +1265,14 @@ catenate_stringinfo_string(StringInfo buffer, const char *addon)
return result;
}
-/*
- * SQL function json_build_object(variadic "any")
- */
Datum
-json_build_object(PG_FUNCTION_ARGS)
+json_build_object_worker(int nargs, Datum *args, bool *nulls, Oid *types,
+ bool absent_on_null, bool unique_keys)
{
- int nargs;
int i;
const char *sep = "";
StringInfo result;
- Datum *args;
- bool *nulls;
- Oid *types;
-
- /* fetch argument values to build the object */
- nargs = extract_variadic_args(fcinfo, 0, false, &args, &types, &nulls);
-
- if (nargs < 0)
- PG_RETURN_NULL();
+ JsonUniqueBuilderState unique_check;
if (nargs % 2 != 0)
ereport(ERROR,
@@ -1017,19 +1286,57 @@ json_build_object(PG_FUNCTION_ARGS)
appendStringInfoChar(result, '{');
+ if (unique_keys)
+ json_unique_builder_init(&unique_check);
+
for (i = 0; i < nargs; i += 2)
{
- appendStringInfoString(result, sep);
- sep = ", ";
+ StringInfo out;
+ bool skip;
+ int key_offset;
+
+ /* Skip null values if absent_on_null */
+ skip = absent_on_null && nulls[i + 1];
+
+ if (skip)
+ {
+ /* If key uniqueness check is needed we must save skipped keys */
+ if (!unique_keys)
+ continue;
+
+ out = json_unique_builder_get_throwawaybuf(&unique_check);
+ }
+ else
+ {
+ appendStringInfoString(result, sep);
+ sep = ", ";
+ out = result;
+ }
/* process key */
if (nulls[i])
ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("argument %d cannot be null", i + 1),
- errhint("Object keys should be text.")));
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("null value not allowed for object key")));
+
+ /* save key offset before appending it */
+ key_offset = out->len;
- add_json(args[i], false, result, types[i], true);
+ add_json(args[i], false, out, types[i], true);
+
+ if (unique_keys)
+ {
+ /* check key uniqueness after key appending */
+ const char *key = &out->data[key_offset];
+
+ if (!json_unique_check_key(&unique_check.check, key, 0))
+ ereport(ERROR,
+ errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE),
+ errmsg("duplicate JSON key %s", key));
+
+ if (skip)
+ continue;
+ }
appendStringInfoString(result, " : ");
@@ -1039,7 +1346,27 @@ json_build_object(PG_FUNCTION_ARGS)
appendStringInfoChar(result, '}');
- PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
+ return PointerGetDatum(cstring_to_text_with_len(result->data, result->len));
+}
+
+/*
+ * SQL function json_build_object(variadic "any")
+ */
+Datum
+json_build_object(PG_FUNCTION_ARGS)
+{
+ Datum *args;
+ bool *nulls;
+ Oid *types;
+
+ /* build argument values to build the object */
+ int nargs = extract_variadic_args(fcinfo, 0, true,
+ &args, &types, &nulls);
+
+ if (nargs < 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_DATUM(json_build_object_worker(nargs, args, nulls, types, false, false));
}
/*
@@ -1051,25 +1378,13 @@ json_build_object_noargs(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2));
}
-/*
- * SQL function json_build_array(variadic "any")
- */
Datum
-json_build_array(PG_FUNCTION_ARGS)
+json_build_array_worker(int nargs, Datum *args, bool *nulls, Oid *types,
+ bool absent_on_null)
{
- int nargs;
int i;
const char *sep = "";
StringInfo result;
- Datum *args;
- bool *nulls;
- Oid *types;
-
- /* fetch argument values to build the array */
- nargs = extract_variadic_args(fcinfo, 0, false, &args, &types, &nulls);
-
- if (nargs < 0)
- PG_RETURN_NULL();
result = makeStringInfo();
@@ -1077,6 +1392,9 @@ json_build_array(PG_FUNCTION_ARGS)
for (i = 0; i < nargs; i++)
{
+ if (absent_on_null && nulls[i])
+ continue;
+
appendStringInfoString(result, sep);
sep = ", ";
add_json(args[i], nulls[i], result, types[i], false);
@@ -1084,7 +1402,27 @@ json_build_array(PG_FUNCTION_ARGS)
appendStringInfoChar(result, ']');
- PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
+ return PointerGetDatum(cstring_to_text_with_len(result->data, result->len));
+}
+
+/*
+ * SQL function json_build_array(variadic "any")
+ */
+Datum
+json_build_array(PG_FUNCTION_ARGS)
+{
+ Datum *args;
+ bool *nulls;
+ Oid *types;
+
+ /* build argument values to build the object */
+ int nargs = extract_variadic_args(fcinfo, 0, true,
+ &args, &types, &nulls);
+
+ if (nargs < 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_DATUM(json_build_array_worker(nargs, args, nulls, types, false));
}
/*