diff options
Diffstat (limited to 'src/backend/utils')
-rw-r--r-- | src/backend/utils/adt/.gitignore | 3 | ||||
-rw-r--r-- | src/backend/utils/adt/Makefile | 19 | ||||
-rw-r--r-- | src/backend/utils/adt/jsonb.c | 91 | ||||
-rw-r--r-- | src/backend/utils/adt/jsonb_util.c | 8 | ||||
-rw-r--r-- | src/backend/utils/adt/jsonpath.c | 1053 | ||||
-rw-r--r-- | src/backend/utils/adt/jsonpath_exec.c | 2292 | ||||
-rw-r--r-- | src/backend/utils/adt/jsonpath_gram.y | 480 | ||||
-rw-r--r-- | src/backend/utils/adt/jsonpath_scan.l | 638 | ||||
-rw-r--r-- | src/backend/utils/adt/regexp.c | 4 | ||||
-rw-r--r-- | src/backend/utils/errcodes.txt | 15 |
10 files changed, 4559 insertions, 44 deletions
diff --git a/src/backend/utils/adt/.gitignore b/src/backend/utils/adt/.gitignore new file mode 100644 index 00000000000..7fab054407e --- /dev/null +++ b/src/backend/utils/adt/.gitignore @@ -0,0 +1,3 @@ +/jsonpath_gram.h +/jsonpath_gram.c +/jsonpath_scan.c diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 82d10af752a..6b24a9caa14 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -17,8 +17,8 @@ OBJS = acl.o amutils.o arrayfuncs.o array_expanded.o array_selfuncs.o \ float.o format_type.o formatting.o genfile.o \ geo_ops.o geo_selfuncs.o geo_spgist.o inet_cidr_ntop.o inet_net_pton.o \ int.o int8.o json.o jsonb.o jsonb_gin.o jsonb_op.o jsonb_util.o \ - jsonfuncs.o like.o like_support.o lockfuncs.o \ - mac.o mac8.o misc.o name.o \ + jsonfuncs.o jsonpath_gram.o jsonpath_scan.o jsonpath.o jsonpath_exec.o \ + like.o like_support.o lockfuncs.o mac.o mac8.o misc.o name.o \ network.o network_gist.o network_selfuncs.o network_spgist.o \ numeric.o numutils.o oid.o oracle_compat.o \ orderedsetaggs.o partitionfuncs.o pg_locale.o pg_lsn.o \ @@ -33,6 +33,21 @@ OBJS = acl.o amutils.o arrayfuncs.o array_expanded.o array_selfuncs.o \ txid.o uuid.o varbit.o varchar.o varlena.o version.o \ windowfuncs.o xid.o xml.o +jsonpath_gram.c: BISONFLAGS += -d + +jsonpath_scan.c: FLEXFLAGS = -CF -p -p + +jsonpath_gram.h: jsonpath_gram.c ; + +# Force these dependencies to be known even without dependency info built: +jsonpath_gram.o jsonpath_scan.o jsonpath_parser.o: jsonpath_gram.h + +# jsonpath_gram.c, jsonpath_gram.h, and jsonpath_scan.c are in the +# distribution tarball, so they are not cleaned here. +clean distclean maintainer-clean: + rm -f lex.backup + + like.o: like.c like_match.c varlena.o: varlena.c levenshtein.c diff --git a/src/backend/utils/adt/jsonb.c b/src/backend/utils/adt/jsonb.c index c02c8569f28..7af4091200b 100644 --- a/src/backend/utils/adt/jsonb.c +++ b/src/backend/utils/adt/jsonb.c @@ -164,6 +164,55 @@ jsonb_send(PG_FUNCTION_ARGS) } /* + * Get the type name of a jsonb container. + */ +static const char * +JsonbContainerTypeName(JsonbContainer *jbc) +{ + JsonbValue scalar; + + if (JsonbExtractScalar(jbc, &scalar)) + return JsonbTypeName(&scalar); + else if (JsonContainerIsArray(jbc)) + return "array"; + else if (JsonContainerIsObject(jbc)) + return "object"; + else + { + elog(ERROR, "invalid jsonb container type: 0x%08x", jbc->header); + return "unknown"; + } +} + +/* + * Get the type name of a jsonb value. + */ +const char * +JsonbTypeName(JsonbValue *jbv) +{ + switch (jbv->type) + { + case jbvBinary: + return JsonbContainerTypeName(jbv->val.binary.data); + case jbvObject: + return "object"; + case jbvArray: + return "array"; + case jbvNumeric: + return "number"; + case jbvString: + return "string"; + case jbvBool: + return "boolean"; + case jbvNull: + return "null"; + default: + elog(ERROR, "unrecognized jsonb value type: %d", jbv->type); + return "unknown"; + } +} + +/* * SQL function jsonb_typeof(jsonb) -> text * * This function is here because the analog json function is in json.c, since @@ -173,45 +222,7 @@ Datum jsonb_typeof(PG_FUNCTION_ARGS) { Jsonb *in = PG_GETARG_JSONB_P(0); - JsonbIterator *it; - JsonbValue v; - char *result; - - if (JB_ROOT_IS_OBJECT(in)) - result = "object"; - else if (JB_ROOT_IS_ARRAY(in) && !JB_ROOT_IS_SCALAR(in)) - result = "array"; - else - { - Assert(JB_ROOT_IS_SCALAR(in)); - - it = JsonbIteratorInit(&in->root); - - /* - * A root scalar is stored as an array of one element, so we get the - * array and then its first (and only) member. - */ - (void) JsonbIteratorNext(&it, &v, true); - Assert(v.type == jbvArray); - (void) JsonbIteratorNext(&it, &v, true); - switch (v.type) - { - case jbvNull: - result = "null"; - break; - case jbvString: - result = "string"; - break; - case jbvNumeric: - result = "number"; - break; - case jbvBool: - result = "boolean"; - break; - default: - elog(ERROR, "unknown jsonb scalar type"); - } - } + const char *result = JsonbContainerTypeName(&in->root); PG_RETURN_TEXT_P(cstring_to_text(result)); } @@ -1857,7 +1868,7 @@ jsonb_object_agg_finalfn(PG_FUNCTION_ARGS) /* * Extract scalar value from raw-scalar pseudo-array jsonb. */ -static bool +bool JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res) { JsonbIterator *it; diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c index 84796a11eb7..3b249fe8cb0 100644 --- a/src/backend/utils/adt/jsonb_util.c +++ b/src/backend/utils/adt/jsonb_util.c @@ -1728,6 +1728,14 @@ convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal) break; case jbvNumeric: + /* replace numeric NaN with string "NaN" */ + if (numeric_is_nan(scalarVal->val.numeric)) + { + appendToBuffer(buffer, "NaN", 3); + *jentry = 3; + break; + } + numlen = VARSIZE_ANY(scalarVal->val.numeric); padlen = padBufferToInt(buffer); diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c new file mode 100644 index 00000000000..2ad1318d33e --- /dev/null +++ b/src/backend/utils/adt/jsonpath.c @@ -0,0 +1,1053 @@ +/*------------------------------------------------------------------------- + * + * jsonpath.c + * Input/output and supporting routines for jsonpath + * + * jsonpath expression is a chain of path items. First path item is $, $var, + * literal or arithmetic expression. Subsequent path items are accessors + * (.key, .*, [subscripts], [*]), filters (? (predicate)) and methods (.type(), + * .size() etc). + * + * For instance, structure of path items for simple expression: + * + * $.a[*].type() + * + * is pretty evident: + * + * $ => .a => [*] => .type() + * + * Some path items such as arithmetic operations, predicates or array + * subscripts may comprise subtrees. For instance, more complex expression + * + * ($.a + $[1 to 5, 7] ? (@ > 3).double()).type() + * + * have following structure of path items: + * + * + => .type() + * ___/ \___ + * / \ + * $ => .a $ => [] => ? => .double() + * _||_ | + * / \ > + * to to / \ + * / \ / @ 3 + * 1 5 7 + * + * Binary encoding of jsonpath constitutes a sequence of 4-bytes aligned + * variable-length path items connected by links. Every item has a header + * consisting of item type (enum JsonPathItemType) and offset of next item + * (zero means no next item). After the header, item may have payload + * depending on item type. For instance, payload of '.key' accessor item is + * length of key name and key name itself. Payload of '>' arithmetic operator + * item is offsets of right and left operands. + * + * So, binary representation of sample expression above is: + * (bottom arrows are next links, top lines are argument links) + * + * _____ + * _____ ___/____ \ __ + * _ /_ \ _____/__/____ \ \ __ _ /_ \ + * / / \ \ / / / \ \ \ / \ / / \ \ + * +(LR) $ .a $ [](* to *, * to *) 1 5 7 ?(A) >(LR) @ 3 .double() .type() + * | | ^ | ^| ^| ^ ^ + * | |__| |__||________________________||___________________| | + * |_______________________________________________________________________| + * + * Copyright (c) 2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/jsonpath.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "funcapi.h" +#include "lib/stringinfo.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/json.h" +#include "utils/jsonpath.h" + + +static Datum jsonPathFromCstring(char *in, int len); +static char *jsonPathToCstring(StringInfo out, JsonPath *in, + int estimated_len); +static int flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item, + int nestingLevel, bool insideArraySubscript); +static void alignStringInfoInt(StringInfo buf); +static int32 reserveSpaceForItemPointer(StringInfo buf); +static void printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey, + bool printBracketes); +static int operationPriority(JsonPathItemType op); + + +/**************************** INPUT/OUTPUT ********************************/ + +/* + * jsonpath type input function + */ +Datum +jsonpath_in(PG_FUNCTION_ARGS) +{ + char *in = PG_GETARG_CSTRING(0); + int len = strlen(in); + + return jsonPathFromCstring(in, len); +} + +/* + * jsonpath type recv function + * + * The type is sent as text in binary mode, so this is almost the same + * as the input function, but it's prefixed with a version number so we + * can change the binary format sent in future if necessary. For now, + * only version 1 is supported. + */ +Datum +jsonpath_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + int version = pq_getmsgint(buf, 1); + char *str; + int nbytes; + + if (version == JSONPATH_VERSION) + str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); + else + elog(ERROR, "unsupported jsonpath version number: %d", version); + + return jsonPathFromCstring(str, nbytes); +} + +/* + * jsonpath type output function + */ +Datum +jsonpath_out(PG_FUNCTION_ARGS) +{ + JsonPath *in = PG_GETARG_JSONPATH_P(0); + + PG_RETURN_CSTRING(jsonPathToCstring(NULL, in, VARSIZE(in))); +} + +/* + * jsonpath type send function + * + * Just send jsonpath as a version number, then a string of text + */ +Datum +jsonpath_send(PG_FUNCTION_ARGS) +{ + JsonPath *in = PG_GETARG_JSONPATH_P(0); + StringInfoData buf; + StringInfoData jtext; + int version = JSONPATH_VERSION; + + initStringInfo(&jtext); + (void) jsonPathToCstring(&jtext, in, VARSIZE(in)); + + pq_begintypsend(&buf); + pq_sendint8(&buf, version); + pq_sendtext(&buf, jtext.data, jtext.len); + pfree(jtext.data); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +/* + * Converts C-string to a jsonpath value. + * + * Uses jsonpath parser to turn string into an AST, then + * flattenJsonPathParseItem() does second pass turning AST into binary + * representation of jsonpath. + */ +static Datum +jsonPathFromCstring(char *in, int len) +{ + JsonPathParseResult *jsonpath = parsejsonpath(in, len); + JsonPath *res; + StringInfoData buf; + + initStringInfo(&buf); + enlargeStringInfo(&buf, 4 * len /* estimation */ ); + + appendStringInfoSpaces(&buf, JSONPATH_HDRSZ); + + if (!jsonpath) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for jsonpath: \"%s\"", in))); + + flattenJsonPathParseItem(&buf, jsonpath->expr, 0, false); + + res = (JsonPath *) buf.data; + SET_VARSIZE(res, buf.len); + res->header = JSONPATH_VERSION; + if (jsonpath->lax) + res->header |= JSONPATH_LAX; + + PG_RETURN_JSONPATH_P(res); +} + +/* + * Converts jsonpath value to a C-string. + * + * If 'out' argument is non-null, the resulting C-string is stored inside the + * StringBuffer. The resulting string is always returned. + */ +static char * +jsonPathToCstring(StringInfo out, JsonPath *in, int estimated_len) +{ + StringInfoData buf; + JsonPathItem v; + + if (!out) + { + out = &buf; + initStringInfo(out); + } + enlargeStringInfo(out, estimated_len); + + if (!(in->header & JSONPATH_LAX)) + appendBinaryStringInfo(out, "strict ", 7); + + jspInit(&v, in); + printJsonPathItem(out, &v, false, true); + + return out->data; +} + +/* + * Recursive function converting given jsonpath parse item and all its + * children into a binary representation. + */ +static int +flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item, + int nestingLevel, bool insideArraySubscript) +{ + /* position from begining of jsonpath data */ + int32 pos = buf->len - JSONPATH_HDRSZ; + int32 chld; + int32 next; + int argNestingLevel = 0; + + check_stack_depth(); + CHECK_FOR_INTERRUPTS(); + + appendStringInfoChar(buf, (char) (item->type)); + + /* + * We align buffer to int32 because a series of int32 values often goes + * after the header, and we want to read them directly by dereferencing + * int32 pointer (see jspInitByBuffer()). + */ + alignStringInfoInt(buf); + + /* + * Reserve space for next item pointer. Actual value will be recorded + * later, after next and children items processing. + */ + next = reserveSpaceForItemPointer(buf); + + switch (item->type) + { + case jpiString: + case jpiVariable: + case jpiKey: + appendBinaryStringInfo(buf, (char *) &item->value.string.len, + sizeof(item->value.string.len)); + appendBinaryStringInfo(buf, item->value.string.val, + item->value.string.len); + appendStringInfoChar(buf, '\0'); + break; + case jpiNumeric: + appendBinaryStringInfo(buf, (char *) item->value.numeric, + VARSIZE(item->value.numeric)); + break; + case jpiBool: + appendBinaryStringInfo(buf, (char *) &item->value.boolean, + sizeof(item->value.boolean)); + break; + case jpiAnd: + case jpiOr: + case jpiEqual: + case jpiNotEqual: + case jpiLess: + case jpiGreater: + case jpiLessOrEqual: + case jpiGreaterOrEqual: + case jpiAdd: + case jpiSub: + case jpiMul: + case jpiDiv: + case jpiMod: + case jpiStartsWith: + { + /* + * First, reserve place for left/right arg's positions, then + * record both args and sets actual position in reserved + * places. + */ + int32 left = reserveSpaceForItemPointer(buf); + int32 right = reserveSpaceForItemPointer(buf); + + chld = !item->value.args.left ? pos : + flattenJsonPathParseItem(buf, item->value.args.left, + nestingLevel + argNestingLevel, + insideArraySubscript); + *(int32 *) (buf->data + left) = chld - pos; + + chld = !item->value.args.right ? pos : + flattenJsonPathParseItem(buf, item->value.args.right, + nestingLevel + argNestingLevel, + insideArraySubscript); + *(int32 *) (buf->data + right) = chld - pos; + } + break; + case jpiLikeRegex: + { + int32 offs; + + appendBinaryStringInfo(buf, + (char *) &item->value.like_regex.flags, + sizeof(item->value.like_regex.flags)); + offs = reserveSpaceForItemPointer(buf); + appendBinaryStringInfo(buf, + (char *) &item->value.like_regex.patternlen, + sizeof(item->value.like_regex.patternlen)); + appendBinaryStringInfo(buf, item->value.like_regex.pattern, + item->value.like_regex.patternlen); + appendStringInfoChar(buf, '\0'); + + chld = flattenJsonPathParseItem(buf, item->value.like_regex.expr, + nestingLevel, + insideArraySubscript); + *(int32 *) (buf->data + offs) = chld - pos; + } + break; + case jpiFilter: + argNestingLevel++; + /* fall through */ + case jpiIsUnknown: + case jpiNot: + case jpiPlus: + case jpiMinus: + case jpiExists: + { + int32 arg = reserveSpaceForItemPointer(buf); + + chld = flattenJsonPathParseItem(buf, item->value.arg, + nestingLevel + argNestingLevel, + insideArraySubscript); + *(int32 *) (buf->data + arg) = chld - pos; + } + break; + case jpiNull: + break; + case jpiRoot: + break; + case jpiAnyArray: + case jpiAnyKey: + break; + case jpiCurrent: + if (nestingLevel <= 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("@ is not allowed in root expressions"))); + break; + case jpiLast: + if (!insideArraySubscript) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("LAST is allowed only in array subscripts"))); + break; + case jpiIndexArray: + { + int32 nelems = item->value.array.nelems; + int offset; + int i; + + appendBinaryStringInfo(buf, (char *) &nelems, sizeof(nelems)); + + offset = buf->len; + + appendStringInfoSpaces(buf, sizeof(int32) * 2 * nelems); + + for (i = 0; i < nelems; i++) + { + int32 *ppos; + int32 topos; + int32 frompos = + flattenJsonPathParseItem(buf, + item->value.array.elems[i].from, + nestingLevel, true) - pos; + + if (item->value.array.elems[i].to) + topos = flattenJsonPathParseItem(buf, + item->value.array.elems[i].to, + nestingLevel, true) - pos; + else + topos = 0; + + ppos = (int32 *) &buf->data[offset + i * 2 * sizeof(int32)]; + + ppos[0] = frompos; + ppos[1] = topos; + } + } + break; + case jpiAny: + appendBinaryStringInfo(buf, + (char *) &item->value.anybounds.first, + sizeof(item->value.anybounds.first)); + appendBinaryStringInfo(buf, + (char *) &item->value.anybounds.last, + sizeof(item->value.anybounds.last)); + break; + case jpiType: + case jpiSize: + case jpiAbs: + case jpiFloor: + case jpiCeiling: + case jpiDouble: + case jpiKeyValue: + break; + default: + elog(ERROR, "unrecognized jsonpath item type: %d", item->type); + } + + if (item->next) + { + chld = flattenJsonPathParseItem(buf, item->next, nestingLevel, + insideArraySubscript) - pos; + *(int32 *) (buf->data + next) = chld; + } + + return pos; +} + +/* + * Align StringInfo to int by adding zero padding bytes + */ +static void +alignStringInfoInt(StringInfo buf) +{ + switch (INTALIGN(buf->len) - buf->len) + { + case 3: + appendStringInfoCharMacro(buf, 0); + case 2: + appendStringInfoCharMacro(buf, 0); + case 1: + appendStringInfoCharMacro(buf, 0); + default: + break; + } +} + +/* + * Reserve space for int32 JsonPathItem pointer. Now zero pointer is written, + * actual value will be recorded at '(int32 *) &buf->data[pos]' later. + */ +static int32 +reserveSpaceForItemPointer(StringInfo buf) +{ + int32 pos = buf->len; + int32 ptr = 0; + + appendBinaryStringInfo(buf, (char *) &ptr, sizeof(ptr)); + + return pos; +} + +/* + * Prints text representation of given jsonpath item and all its children. + */ +static void +printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey, + bool printBracketes) +{ + JsonPathItem elem; + int i; + + check_stack_depth(); + CHECK_FOR_INTERRUPTS(); + + switch (v->type) + { + case jpiNull: + appendStringInfoString(buf, "null"); + break; + case jpiKey: + if (inKey) + appendStringInfoChar(buf, '.'); + escape_json(buf, jspGetString(v, NULL)); + break; + case jpiString: + escape_json(buf, jspGetString(v, NULL)); + break; + case jpiVariable: + appendStringInfoChar(buf, '$'); + escape_json(buf, jspGetString(v, NULL)); + break; + case jpiNumeric: + appendStringInfoString(buf, + DatumGetCString(DirectFunctionCall1(numeric_out, + PointerGetDatum(jspGetNumeric(v))))); + break; + case jpiBool: + if (jspGetBool(v)) + appendBinaryStringInfo(buf, "true", 4); + else + appendBinaryStringInfo(buf, "false", 5); + break; + case jpiAnd: + case jpiOr: + case jpiEqual: + case jpiNotEqual: + case jpiLess: + case jpiGreater: + case jpiLessOrEqual: + case jpiGreaterOrEqual: + case jpiAdd: + case jpiSub: + case jpiMul: + case jpiDiv: + case jpiMod: + case jpiStartsWith: + if (printBracketes) + appendStringInfoChar(buf, '('); + jspGetLeftArg(v, &elem); + printJsonPathItem(buf, &elem, false, + operationPriority(elem.type) <= + operationPriority(v->type)); + appendStringInfoChar(buf, ' '); + appendStringInfoString(buf, jspOperationName(v->type)); + appendStringInfoChar(buf, ' '); + jspGetRightArg(v, &elem); + printJsonPathItem(buf, &elem, false, + operationPriority(elem.type) <= + operationPriority(v->type)); + if (printBracketes) + appendStringInfoChar(buf, ')'); + break; + case jpiLikeRegex: + if (printBracketes) + appendStringInfoChar(buf, '('); + + jspInitByBuffer(&elem, v->base, v->content.like_regex.expr); + printJsonPathItem(buf, &elem, false, + operationPriority(elem.type) <= + operationPriority(v->type)); + + appendBinaryStringInfo(buf, " like_regex ", 12); + + escape_json(buf, v->content.like_regex.pattern); + + if (v->content.like_regex.flags) + { + appendBinaryStringInfo(buf, " flag \"", 7); + + if (v->content.like_regex.flags & JSP_REGEX_ICASE) + appendStringInfoChar(buf, 'i'); + if (v->content.like_regex.flags & JSP_REGEX_SLINE) + appendStringInfoChar(buf, 's'); + if (v->content.like_regex.flags & JSP_REGEX_MLINE) + appendStringInfoChar(buf, 'm'); + if (v->content.like_regex.flags & JSP_REGEX_WSPACE) + appendStringInfoChar(buf, 'x'); + + appendStringInfoChar(buf, '"'); + } + + if (printBracketes) + appendStringInfoChar(buf, ')'); + break; + case jpiPlus: + case jpiMinus: + if (printBracketes) + appendStringInfoChar(buf, '('); + appendStringInfoChar(buf, v->type == jpiPlus ? '+' : '-'); + jspGetArg(v, &elem); + printJsonPathItem(buf, &elem, false, + operationPriority(elem.type) <= + operationPriority(v->type)); + if (printBracketes) + appendStringInfoChar(buf, ')'); + break; + case jpiFilter: + appendBinaryStringInfo(buf, "?(", 2); + jspGetArg(v, &elem); + printJsonPathItem(buf, &elem, false, false); + appendStringInfoChar(buf, ')'); + break; + case jpiNot: + appendBinaryStringInfo(buf, "!(", 2); + jspGetArg(v, &elem); + printJsonPathItem(buf, &elem, false, false); + appendStringInfoChar(buf, ')'); + break; + case jpiIsUnknown: + appendStringInfoChar(buf, '('); + jspGetArg(v, &elem); + printJsonPathItem(buf, &elem, false, false); + appendBinaryStringInfo(buf, ") is unknown", 12); + break; + case jpiExists: + appendBinaryStringInfo(buf, "exists (", 8); + jspGetArg(v, &elem); + printJsonPathItem(buf, &elem, false, false); + appendStringInfoChar(buf, ')'); + break; + case jpiCurrent: + Assert(!inKey); + appendStringInfoChar(buf, '@'); + break; + case jpiRoot: + Assert(!inKey); + appendStringInfoChar(buf, '$'); + break; + case jpiLast: + appendBinaryStringInfo(buf, "last", 4); + break; + case jpiAnyArray: + appendBinaryStringInfo(buf, "[*]", 3); + break; + case jpiAnyKey: + if (inKey) + appendStringInfoChar(buf, '.'); + appendStringInfoChar(buf, '*'); + break; + case jpiIndexArray: + appendStringInfoChar(buf, '['); + for (i = 0; i < v->content.array.nelems; i++) + { + JsonPathItem from; + JsonPathItem to; + bool range = jspGetArraySubscript(v, &from, &to, i); + + if (i) + appendStringInfoChar(buf, ','); + + printJsonPathItem(buf, &from, false, false); + + if (range) + { + appendBinaryStringInfo(buf, " to ", 4); + printJsonPathItem(buf, &to, false, false); + } + } + appendStringInfoChar(buf, ']'); + break; + case jpiAny: + if (inKey) + appendStringInfoChar(buf, '.'); + + if (v->content.anybounds.first == 0 && + v->content.anybounds.last == PG_UINT32_MAX) + appendBinaryStringInfo(buf, "**", 2); + else if (v->content.anybounds.first == v->content.anybounds.last) + { + if (v->content.anybounds.first == PG_UINT32_MAX) + appendStringInfo(buf, "**{last}"); + else + appendStringInfo(buf, "**{%u}", + v->content.anybounds.first); + } + else if (v->content.anybounds.first == PG_UINT32_MAX) + appendStringInfo(buf, "**{last to %u}", + v->content.anybounds.last); + else if (v->content.anybounds.last == PG_UINT32_MAX) + appendStringInfo(buf, "**{%u to last}", + v->content.anybounds.first); + else + appendStringInfo(buf, "**{%u to %u}", + v->content.anybounds.first, + v->content.anybounds.last); + break; + case jpiType: + appendBinaryStringInfo(buf, ".type()", 7); + break; + case jpiSize: + appendBinaryStringInfo(buf, ".size()", 7); + break; + case jpiAbs: + appendBinaryStringInfo(buf, ".abs()", 6); + break; + case jpiFloor: + appendBinaryStringInfo(buf, ".floor()", 8); + break; + case jpiCeiling: + appendBinaryStringInfo(buf, ".ceiling()", 10); + break; + case jpiDouble: + appendBinaryStringInfo(buf, ".double()", 9); + break; + case jpiKeyValue: + appendBinaryStringInfo(buf, ".keyvalue()", 11); + break; + default: + elog(ERROR, "unrecognized jsonpath item type: %d", v->type); + } + + if (jspGetNext(v, &elem)) + printJsonPathItem(buf, &elem, true, true); +} + +const char * +jspOperationName(JsonPathItemType type) +{ + switch (type) + { + case jpiAnd: + return "&&"; + case jpiOr: + return "||"; + case jpiEqual: + return "=="; + case jpiNotEqual: + return "!="; + case jpiLess: + return "<"; + case jpiGreater: + return ">"; + case jpiLessOrEqual: + return "<="; + case jpiGreaterOrEqual: + return ">="; + case jpiPlus: + case jpiAdd: + return "+"; + case jpiMinus: + case jpiSub: + return "-"; + case jpiMul: + return "*"; + case jpiDiv: + return "/"; + case jpiMod: + return "%"; + case jpiStartsWith: + return "starts with"; + case jpiLikeRegex: + return "like_regex"; + case jpiType: + return "type"; + case jpiSize: + return "size"; + case jpiKeyValue: + return "keyvalue"; + case jpiDouble: + return "double"; + case jpiAbs: + return "abs"; + case jpiFloor: + return "floor"; + case jpiCeiling: + return "ceiling"; + default: + elog(ERROR, "unrecognized jsonpath item type: %d", type); + return NULL; + } +} + +static int +operationPriority(JsonPathItemType op) +{ + switch (op) + { + case jpiOr: + return 0; + case jpiAnd: + return 1; + case jpiEqual: + case jpiNotEqual: + case jpiLess: + case jpiGreater: + case jpiLessOrEqual: + case jpiGreaterOrEqual: + case jpiStartsWith: + return 2; + case jpiAdd: + case jpiSub: + return 3; + case jpiMul: + case jpiDiv: + case jpiMod: + return 4; + case jpiPlus: + case jpiMinus: + return 5; + default: + return 6; + } +} + +/******************* Support functions for JsonPath *************************/ + +/* + * Support macros to read stored values + */ + +#define read_byte(v, b, p) do { \ + (v) = *(uint8*)((b) + (p)); \ + (p) += 1; \ +} while(0) \ + +#define read_int32(v, b, p) do { \ + (v) = *(uint32*)((b) + (p)); \ + (p) += sizeof(int32); \ +} while(0) \ + +#define read_int32_n(v, b, p, n) do { \ + (v) = (void *)((b) + (p)); \ + (p) += sizeof(int32) * (n); \ +} while(0) \ + +/* + * Read root node and fill root node representation + */ +void +jspInit(JsonPathItem *v, JsonPath *js) +{ + Assert((js->header & ~JSONPATH_LAX) == JSONPATH_VERSION); + jspInitByBuffer(v, js->data, 0); +} + +/* + * Read node from buffer and fill its representation + */ +void +jspInitByBuffer(JsonPathItem *v, char *base, int32 pos) +{ + v->base = base + pos; + + read_byte(v->type, base, pos); + pos = INTALIGN((uintptr_t) (base + pos)) - (uintptr_t) base; + read_int32(v->nextPos, base, pos); + + switch (v->type) + { + case jpiNull: + case jpiRoot: + case jpiCurrent: + case jpiAnyArray: + case jpiAnyKey: + case jpiType: + case jpiSize: + case jpiAbs: + case jpiFloor: + case jpiCeiling: + case jpiDouble: + case jpiKeyValue: + case jpiLast: + break; + case jpiKey: + case jpiString: + case jpiVariable: + read_int32(v->content.value.datalen, base, pos); + /* follow next */ + case jpiNumeric: + case jpiBool: + v->content.value.data = base + pos; + break; + case jpiAnd: + case jpiOr: + case jpiAdd: + case jpiSub: + case jpiMul: + case jpiDiv: + case jpiMod: + case jpiEqual: + case jpiNotEqual: + case jpiLess: + case jpiGreater: + case jpiLessOrEqual: + case jpiGreaterOrEqual: + case jpiStartsWith: + read_int32(v->content.args.left, base, pos); + read_int32(v->content.args.right, base, pos); + break; + case jpiLikeRegex: + read_int32(v->content.like_regex.flags, base, pos); + read_int32(v->content.like_regex.expr, base, pos); + read_int32(v->content.like_regex.patternlen, base, pos); + v->content.like_regex.pattern = base + pos; + break; + case jpiNot: + case jpiExists: + case jpiIsUnknown: + case jpiPlus: + case jpiMinus: + case jpiFilter: + read_int32(v->content.arg, base, pos); + break; + case jpiIndexArray: + read_int32(v->content.array.nelems, base, pos); + read_int32_n(v->content.array.elems, base, pos, + v->content.array.nelems * 2); + break; + case jpiAny: + read_int32(v->content.anybounds.first, base, pos); + read_int32(v->content.anybounds.last, base, pos); + break; + default: + elog(ERROR, "unrecognized jsonpath item type: %d", v->type); + } +} + +void +jspGetArg(JsonPathItem *v, JsonPathItem *a) +{ + Assert(v->type == jpiFilter || + v->type == jpiNot || + v->type == jpiIsUnknown || + v->type == jpiExists || + v->type == jpiPlus || + v->type == jpiMinus); + + jspInitByBuffer(a, v->base, v->content.arg); +} + +bool +jspGetNext(JsonPathItem *v, JsonPathItem *a) +{ + if (jspHasNext(v)) + { + Assert(v->type == jpiString || + v->type == jpiNumeric || + v->type == jpiBool || + v->type == jpiNull || + v->type == jpiKey || + v->type == jpiAny || + v->type == jpiAnyArray || + v->type == jpiAnyKey || + v->type == jpiIndexArray || + v->type == jpiFilter || + v->type == jpiCurrent || + v->type == jpiExists || + v->type == jpiRoot || + v->type == jpiVariable || + v->type == jpiLast || + v->type == jpiAdd || + v->type == jpiSub || + v->type == jpiMul || + v->type == jpiDiv || + v->type == jpiMod || + v->type == jpiPlus || + v->type == jpiMinus || + v->type == jpiEqual || + v->type == jpiNotEqual || + v->type == jpiGreater || + v->type == jpiGreaterOrEqual || + v->type == jpiLess || + v->type == jpiLessOrEqual || + v->type == jpiAnd || + v->type == jpiOr || + v->type == jpiNot || + v->type == jpiIsUnknown || + v->type == jpiType || + v->type == jpiSize || + v->type == jpiAbs || + v->type == jpiFloor || + v->type == jpiCeiling || + v->type == jpiDouble || + v->type == jpiKeyValue || + v->type == jpiStartsWith); + + if (a) + jspInitByBuffer(a, v->base, v->nextPos); + return true; + } + + return false; +} + +void +jspGetLeftArg(JsonPathItem *v, JsonPathItem *a) +{ + Assert(v->type == jpiAnd || + v->type == jpiOr || + v->type == jpiEqual || + v->type == jpiNotEqual || + v->type == jpiLess || + v->type == jpiGreater || + v->type == jpiLessOrEqual || + v->type == jpiGreaterOrEqual || + v->type == jpiAdd || + v->type == jpiSub || + v->type == jpiMul || + v->type == jpiDiv || + v->type == jpiMod || + v->type == jpiStartsWith); + + jspInitByBuffer(a, v->base, v->content.args.left); +} + +void +jspGetRightArg(JsonPathItem *v, JsonPathItem *a) +{ + Assert(v->type == jpiAnd || + v->type == jpiOr || + v->type == jpiEqual || + v->type == jpiNotEqual || + v->type == jpiLess || + v->type == jpiGreater || + v->type == jpiLessOrEqual || + v->type == jpiGreaterOrEqual || + v->type == jpiAdd || + v->type == jpiSub || + v->type == jpiMul || + v->type == jpiDiv || + v->type == jpiMod || + v->type == jpiStartsWith); + + jspInitByBuffer(a, v->base, v->content.args.right); +} + +bool +jspGetBool(JsonPathItem *v) +{ + Assert(v->type == jpiBool); + + return (bool) *v->content.value.data; +} + +Numeric +jspGetNumeric(JsonPathItem *v) +{ + Assert(v->type == jpiNumeric); + + return (Numeric) v->content.value.data; +} + +char * +jspGetString(JsonPathItem *v, int32 *len) +{ + Assert(v->type == jpiKey || + v->type == jpiString || + v->type == jpiVariable); + + if (len) + *len = v->content.value.datalen; + return v->content.value.data; +} + +bool +jspGetArraySubscript(JsonPathItem *v, JsonPathItem *from, JsonPathItem *to, + int i) +{ + Assert(v->type == jpiIndexArray); + + jspInitByBuffer(from, v->base, v->content.array.elems[i].from); + + if (!v->content.array.elems[i].to) + return false; + + jspInitByBuffer(to, v->base, v->content.array.elems[i].to); + + return true; +} diff --git a/src/backend/utils/adt/jsonpath_exec.c b/src/backend/utils/adt/jsonpath_exec.c new file mode 100644 index 00000000000..0717071188f --- /dev/null +++ b/src/backend/utils/adt/jsonpath_exec.c @@ -0,0 +1,2292 @@ +/*------------------------------------------------------------------------- + * + * jsonpath_exec.c + * Routines for SQL/JSON path execution. + * + * Jsonpath is executed in the global context stored in JsonPathExecContext, + * which is passed to almost every function involved into execution. Entry + * point for jsonpath execution is executeJsonPath() function, which + * initializes execution context including initial JsonPathItem and JsonbValue, + * flags, stack for calculation of @ in filters. + * + * The result of jsonpath query execution is enum JsonPathExecResult and + * if succeeded sequence of JsonbValue, written to JsonValueList *found, which + * is passed through the jsonpath items. When found == NULL, we're inside + * exists-query and we're interested only in whether result is empty. In this + * case execution is stopped once first result item is found, and the only + * execution result is JsonPathExecResult. The values of JsonPathExecResult + * are following: + * - jperOk -- result sequence is not empty + * - jperNotFound -- result sequence is empty + * - jperError -- error occurred during execution + * + * Jsonpath is executed recursively (see executeItem()) starting form the + * first path item (which in turn might be, for instance, an arithmetic + * expression evaluated separately). On each step single JsonbValue obtained + * from previous path item is processed. The result of processing is a + * sequence of JsonbValue (probably empty), which is passed to the next path + * item one by one. When there is no next path item, then JsonbValue is added + * to the 'found' list. When found == NULL, then execution functions just + * return jperOk (see executeNextItem()). + * + * Many of jsonpath operations require automatic unwrapping of arrays in lax + * mode. So, if input value is array, then corresponding operation is + * processed not on array itself, but on all of its members one by one. + * executeItemOptUnwrapTarget() function have 'unwrap' argument, which indicates + * whether unwrapping of array is needed. When unwrap == true, each of array + * members is passed to executeItemOptUnwrapTarget() again but with unwrap == false + * in order to evade subsequent array unwrapping. + * + * All boolean expressions (predicates) are evaluated by executeBoolItem() + * function, which returns tri-state JsonPathBool. When error is occurred + * during predicate execution, it returns jpbUnknown. According to standard + * predicates can be only inside filters. But we support their usage as + * jsonpath expression. This helps us to implement @@ operator. In this case + * resulting JsonPathBool is transformed into jsonb bool or null. + * + * Arithmetic and boolean expression are evaluated recursively from expression + * tree top down to the leaves. Therefore, for binary arithmetic expressions + * we calculate operands first. Then we check that results are numeric + * singleton lists, calculate the result and pass it to the next path item. + * + * Copyright (c) 2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/jsonpath_exec.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "funcapi.h" +#include "lib/stringinfo.h" +#include "miscadmin.h" +#include "regex/regex.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/formatting.h" +#include "utils/float.h" +#include "utils/guc.h" +#include "utils/json.h" +#include "utils/jsonpath.h" +#include "utils/date.h" +#include "utils/timestamp.h" +#include "utils/varlena.h" + + +/* Standard error message for SQL/JSON errors */ +#define ERRMSG_JSON_ARRAY_NOT_FOUND "SQL/JSON array not found" +#define ERRMSG_JSON_OBJECT_NOT_FOUND "SQL/JSON object not found" +#define ERRMSG_JSON_MEMBER_NOT_FOUND "SQL/JSON member not found" +#define ERRMSG_JSON_NUMBER_NOT_FOUND "SQL/JSON number not found" +#define ERRMSG_JSON_SCALAR_REQUIRED "SQL/JSON scalar required" +#define ERRMSG_SINGLETON_JSON_ITEM_REQUIRED "singleton SQL/JSON item required" +#define ERRMSG_NON_NUMERIC_JSON_ITEM "non-numeric SQL/JSON item" +#define ERRMSG_INVALID_JSON_SUBSCRIPT "invalid SQL/JSON subscript" + +/* + * Represents "base object" and it's "id" for .keyvalue() evaluation. + */ +typedef struct JsonBaseObjectInfo +{ + JsonbContainer *jbc; + int id; +} JsonBaseObjectInfo; + +/* + * Context of jsonpath execution. + */ +typedef struct JsonPathExecContext +{ + Jsonb *vars; /* variables to substitute into jsonpath */ + JsonbValue *root; /* for $ evaluation */ + JsonbValue *current; /* for @ evaluation */ + JsonBaseObjectInfo baseObject; /* "base object" for .keyvalue() + * evaluation */ + int lastGeneratedObjectId; /* "id" counter for .keyvalue() + * evaluation */ + int innermostArraySize; /* for LAST array index evaluation */ + bool laxMode; /* true for "lax" mode, false for "strict" + * mode */ + bool ignoreStructuralErrors; /* with "true" structural errors such + * as absence of required json item or + * unexpected json item type are + * ignored */ + bool throwErrors; /* with "false" all suppressible errors are + * suppressed */ +} JsonPathExecContext; + +/* Context for LIKE_REGEX execution. */ +typedef struct JsonLikeRegexContext +{ + text *regex; + int cflags; +} JsonLikeRegexContext; + +/* Result of jsonpath predicate evaluation */ +typedef enum JsonPathBool +{ + jpbFalse = 0, + jpbTrue = 1, + jpbUnknown = 2 +} JsonPathBool; + +/* Result of jsonpath expression evaluation */ +typedef enum JsonPathExecResult +{ + jperOk = 0, + jperNotFound = 1, + jperError = 2 +} JsonPathExecResult; + +#define jperIsError(jper) ((jper) == jperError) + +/* + * List of jsonb values with shortcut for single-value list. + */ +typedef struct JsonValueList +{ + JsonbValue *singleton; + List *list; +} JsonValueList; + +typedef struct JsonValueListIterator +{ + JsonbValue *value; + ListCell *next; +} JsonValueListIterator; + +/* strict/lax flags is decomposed into four [un]wrap/error flags */ +#define jspStrictAbsenseOfErrors(cxt) (!(cxt)->laxMode) +#define jspAutoUnwrap(cxt) ((cxt)->laxMode) +#define jspAutoWrap(cxt) ((cxt)->laxMode) +#define jspIgnoreStructuralErrors(cxt) ((cxt)->ignoreStructuralErrors) +#define jspThrowErrors(cxt) ((cxt)->throwErrors) + +/* Convenience macro: return or throw error depending on context */ +#define RETURN_ERROR(throw_error) \ +do { \ + if (jspThrowErrors(cxt)) \ + throw_error; \ + else \ + return jperError; \ +} while (0) + +typedef JsonPathBool (*JsonPathPredicateCallback) (JsonPathItem *jsp, + JsonbValue *larg, + JsonbValue *rarg, + void *param); + +static JsonPathExecResult executeJsonPath(JsonPath *path, Jsonb *vars, + Jsonb *json, bool throwErrors, JsonValueList *result); +static JsonPathExecResult executeItem(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, JsonValueList *found); +static JsonPathExecResult executeItemOptUnwrapTarget(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, + JsonValueList *found, bool unwrap); +static JsonPathExecResult executeItemUnwrapTargetArray(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, + JsonValueList *found, bool unwrapElements); +static JsonPathExecResult executeNextItem(JsonPathExecContext *cxt, + JsonPathItem *cur, JsonPathItem *next, + JsonbValue *v, JsonValueList *found, bool copy); +static JsonPathExecResult executeItemOptUnwrapResult( + JsonPathExecContext *cxt, JsonPathItem *jsp, JsonbValue *jb, + bool unwrap, JsonValueList *found); +static JsonPathExecResult executeItemOptUnwrapResultNoThrow( + JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, bool unwrap, JsonValueList *found); +static JsonPathBool executeBoolItem(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, bool canHaveNext); +static JsonPathBool executeNestedBoolItem(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb); +static JsonPathExecResult executeAnyItem(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbContainer *jbc, JsonValueList *found, + uint32 level, uint32 first, uint32 last, + bool ignoreStructuralErrors, bool unwrapNext); +static JsonPathBool executePredicate(JsonPathExecContext *cxt, + JsonPathItem *pred, JsonPathItem *larg, JsonPathItem *rarg, + JsonbValue *jb, bool unwrapRightArg, + JsonPathPredicateCallback exec, void *param); +static JsonPathExecResult executeBinaryArithmExpr(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, PGFunction func, + JsonValueList *found); +static JsonPathExecResult executeUnaryArithmExpr(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, PGFunction func, + JsonValueList *found); +static JsonPathBool executeStartsWith(JsonPathItem *jsp, + JsonbValue *whole, JsonbValue *initial, void *param); +static JsonPathBool executeLikeRegex(JsonPathItem *jsp, JsonbValue *str, + JsonbValue *rarg, void *param); +static JsonPathExecResult executeNumericItemMethod(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, bool unwrap, PGFunction func, + JsonValueList *found); +static JsonPathExecResult executeKeyValueMethod(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, JsonValueList *found); +static JsonPathExecResult appendBoolResult(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonValueList *found, JsonPathBool res); +static void getJsonPathItem(JsonPathExecContext *cxt, JsonPathItem *item, + JsonbValue *value); +static void getJsonPathVariable(JsonPathExecContext *cxt, + JsonPathItem *variable, Jsonb *vars, JsonbValue *value); +static int JsonbArraySize(JsonbValue *jb); +static JsonPathBool executeComparison(JsonPathItem *cmp, JsonbValue *lv, + JsonbValue *rv, void *p); +static JsonPathBool compareItems(int32 op, JsonbValue *jb1, JsonbValue *jb2); +static int compareNumeric(Numeric a, Numeric b); +static JsonbValue *copyJsonbValue(JsonbValue *src); +static JsonPathExecResult getArrayIndex(JsonPathExecContext *cxt, + JsonPathItem *jsp, JsonbValue *jb, int32 *index); +static JsonBaseObjectInfo setBaseObject(JsonPathExecContext *cxt, + JsonbValue *jbv, int32 id); +static void JsonValueListAppend(JsonValueList *jvl, JsonbValue *jbv); +static int JsonValueListLength(const JsonValueList *jvl); +static bool JsonValueListIsEmpty(JsonValueList *jvl); +static JsonbValue *JsonValueListHead(JsonValueList *jvl); +static List *JsonValueListGetList(JsonValueList *jvl); +static void JsonValueListInitIterator(const JsonValueList *jvl, + JsonValueListIterator *it); +static JsonbValue *JsonValueListNext(const JsonValueList *jvl, + JsonValueListIterator *it); +static int JsonbType(JsonbValue *jb); +static JsonbValue *JsonbInitBinary(JsonbValue *jbv, Jsonb *jb); +static int JsonbType(JsonbValue *jb); +static JsonbValue *getScalar(JsonbValue *scalar, enum jbvType type); +static JsonbValue *wrapItemsInArray(const JsonValueList *items); + +/****************** User interface to JsonPath executor ********************/ + +/* + * jsonb_path_exists + * Returns true if jsonpath returns at least one item for the specified + * jsonb value. This function and jsonb_path_match() are used to + * implement @? and @@ operators, which in turn are intended to have an + * index support. Thus, it's desirable to make it easier to achieve + * consistency between index scan results and sequential scan results. + * So, we throw as less errors as possible. Regarding this function, + * such behavior also matches behavior of JSON_EXISTS() clause of + * SQL/JSON. Regarding jsonb_path_match(), this function doesn't have + * an analogy in SQL/JSON, so we define its behavior on our own. + */ +Datum +jsonb_path_exists(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + JsonPath *jp = PG_GETARG_JSONPATH_P(1); + JsonPathExecResult res; + Jsonb *vars = NULL; + bool silent = true; + + if (PG_NARGS() == 4) + { + vars = PG_GETARG_JSONB_P(2); + silent = PG_GETARG_BOOL(3); + } + + res = executeJsonPath(jp, vars, jb, !silent, NULL); + + PG_FREE_IF_COPY(jb, 0); + PG_FREE_IF_COPY(jp, 1); + + if (jperIsError(res)) + PG_RETURN_NULL(); + + PG_RETURN_BOOL(res == jperOk); +} + +/* + * jsonb_path_exists_opr + * Implementation of operator "jsonb @? jsonpath" (2-argument version of + * jsonb_path_exists()). + */ +Datum +jsonb_path_exists_opr(PG_FUNCTION_ARGS) +{ + /* just call the other one -- it can handle both cases */ + return jsonb_path_exists(fcinfo); +} + +/* + * jsonb_path_match + * Returns jsonpath predicate result item for the specified jsonb value. + * See jsonb_path_exists() comment for details regarding error handling. + */ +Datum +jsonb_path_match(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + JsonPath *jp = PG_GETARG_JSONPATH_P(1); + JsonbValue *jbv; + JsonValueList found = {0}; + Jsonb *vars = NULL; + bool silent = true; + + if (PG_NARGS() == 4) + { + vars = PG_GETARG_JSONB_P(2); + silent = PG_GETARG_BOOL(3); + } + + (void) executeJsonPath(jp, vars, jb, !silent, &found); + + if (JsonValueListLength(&found) < 1) + PG_RETURN_NULL(); + + jbv = JsonValueListHead(&found); + + PG_FREE_IF_COPY(jb, 0); + PG_FREE_IF_COPY(jp, 1); + + if (jbv->type != jbvBool) + PG_RETURN_NULL(); + + PG_RETURN_BOOL(jbv->val.boolean); +} + +/* + * jsonb_path_match_opr + * Implementation of operator "jsonb @@ jsonpath" (2-argument version of + * jsonb_path_match()). + */ +Datum +jsonb_path_match_opr(PG_FUNCTION_ARGS) +{ + /* just call the other one -- it can handle both cases */ + return jsonb_path_match(fcinfo); +} + +/* + * jsonb_path_query + * Executes jsonpath for given jsonb document and returns result as + * rowset. + */ +Datum +jsonb_path_query(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + List *found; + JsonbValue *v; + ListCell *c; + + if (SRF_IS_FIRSTCALL()) + { + JsonPath *jp; + Jsonb *jb; + MemoryContext oldcontext; + Jsonb *vars; + bool silent; + JsonValueList found = {0}; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + jb = PG_GETARG_JSONB_P_COPY(0); + jp = PG_GETARG_JSONPATH_P_COPY(1); + vars = PG_GETARG_JSONB_P_COPY(2); + silent = PG_GETARG_BOOL(3); + + (void) executeJsonPath(jp, vars, jb, !silent, &found); + + funcctx->user_fctx = JsonValueListGetList(&found); + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + found = funcctx->user_fctx; + + c = list_head(found); + + if (c == NULL) + SRF_RETURN_DONE(funcctx); + + v = lfirst(c); + funcctx->user_fctx = list_delete_first(found); + + SRF_RETURN_NEXT(funcctx, JsonbPGetDatum(JsonbValueToJsonb(v))); +} + +/* + * jsonb_path_query_array + * Executes jsonpath for given jsonb document and returns result as + * jsonb array. + */ +Datum +jsonb_path_query_array(FunctionCallInfo fcinfo) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + JsonPath *jp = PG_GETARG_JSONPATH_P(1); + JsonValueList found = {0}; + Jsonb *vars = PG_GETARG_JSONB_P(2); + bool silent = PG_GETARG_BOOL(3); + + (void) executeJsonPath(jp, vars, jb, !silent, &found); + + PG_RETURN_JSONB_P(JsonbValueToJsonb(wrapItemsInArray(&found))); +} + +/* + * jsonb_path_query_first + * Executes jsonpath for given jsonb document and returns first result + * item. If there are no items, NULL returned. + */ +Datum +jsonb_path_query_first(FunctionCallInfo fcinfo) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + JsonPath *jp = PG_GETARG_JSONPATH_P(1); + JsonValueList found = {0}; + Jsonb *vars = PG_GETARG_JSONB_P(2); + bool silent = PG_GETARG_BOOL(3); + + (void) executeJsonPath(jp, vars, jb, !silent, &found); + + if (JsonValueListLength(&found) >= 1) + PG_RETURN_JSONB_P(JsonbValueToJsonb(JsonValueListHead(&found))); + else + PG_RETURN_NULL(); +} + +/********************Execute functions for JsonPath**************************/ + +/* + * Interface to jsonpath executor + * + * 'path' - jsonpath to be executed + * 'vars' - variables to be substituted to jsonpath + * 'json' - target document for jsonpath evaluation + * 'throwErrors' - whether we should throw suppressible errors + * 'result' - list to store result items into + * + * Returns an error happens during processing or NULL on no error. + * + * Note, jsonb and jsonpath values should be avaliable and untoasted during + * work because JsonPathItem, JsonbValue and result item could have pointers + * into input values. If caller needs to just check if document matches + * jsonpath, then it doesn't provide a result arg. In this case executor + * works till first positive result and does not check the rest if possible. + * In other case it tries to find all the satisfied result items. + */ +static JsonPathExecResult +executeJsonPath(JsonPath *path, Jsonb *vars, Jsonb *json, bool throwErrors, + JsonValueList *result) +{ + JsonPathExecContext cxt; + JsonPathExecResult res; + JsonPathItem jsp; + JsonbValue jbv; + + jspInit(&jsp, path); + + if (!JsonbExtractScalar(&json->root, &jbv)) + JsonbInitBinary(&jbv, json); + + if (vars && !JsonContainerIsObject(&vars->root)) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("jsonb containing jsonpath variables " + "is not an object"))); + } + + cxt.vars = vars; + cxt.laxMode = (path->header & JSONPATH_LAX) != 0; + cxt.ignoreStructuralErrors = cxt.laxMode; + cxt.root = &jbv; + cxt.current = &jbv; + cxt.baseObject.jbc = NULL; + cxt.baseObject.id = 0; + cxt.lastGeneratedObjectId = vars ? 2 : 1; + cxt.innermostArraySize = -1; + cxt.throwErrors = throwErrors; + + if (jspStrictAbsenseOfErrors(&cxt) && !result) + { + /* + * In strict mode we must get a complete list of values to check that + * there are no errors at all. + */ + JsonValueList vals = {0}; + + res = executeItem(&cxt, &jsp, &jbv, &vals); + + if (jperIsError(res)) + return res; + + return JsonValueListIsEmpty(&vals) ? jperNotFound : jperOk; + } + + res = executeItem(&cxt, &jsp, &jbv, result); + + Assert(!throwErrors || !jperIsError(res)); + + return res; +} + +/* + * Execute jsonpath with automatic unwrapping of current item in lax mode. + */ +static JsonPathExecResult +executeItem(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, JsonValueList *found) +{ + return executeItemOptUnwrapTarget(cxt, jsp, jb, found, jspAutoUnwrap(cxt)); +} + +/* + * Main jsonpath executor function: walks on jsonpath structure, finds + * relevant parts of jsonb and evaluates expressions over them. + * When 'unwrap' is true current SQL/JSON item is unwrapped if it is an array. + */ +static JsonPathExecResult +executeItemOptUnwrapTarget(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, JsonValueList *found, bool unwrap) +{ + JsonPathItem elem; + JsonPathExecResult res = jperNotFound; + JsonBaseObjectInfo baseObject; + + check_stack_depth(); + CHECK_FOR_INTERRUPTS(); + + switch (jsp->type) + { + /* all boolean item types: */ + case jpiAnd: + case jpiOr: + case jpiNot: + case jpiIsUnknown: + case jpiEqual: + case jpiNotEqual: + case jpiLess: + case jpiGreater: + case jpiLessOrEqual: + case jpiGreaterOrEqual: + case jpiExists: + case jpiStartsWith: + case jpiLikeRegex: + { + JsonPathBool st = executeBoolItem(cxt, jsp, jb, true); + + res = appendBoolResult(cxt, jsp, found, st); + break; + } + + case jpiKey: + if (JsonbType(jb) == jbvObject) + { + JsonbValue *v; + JsonbValue key; + + key.type = jbvString; + key.val.string.val = jspGetString(jsp, &key.val.string.len); + + v = findJsonbValueFromContainer(jb->val.binary.data, + JB_FOBJECT, &key); + + if (v != NULL) + { + res = executeNextItem(cxt, jsp, NULL, + v, found, false); + + /* free value if it was not added to found list */ + if (jspHasNext(jsp) || !found) + pfree(v); + } + else if (!jspIgnoreStructuralErrors(cxt)) + { + StringInfoData keybuf; + char *keystr; + + Assert(found); + + if (!jspThrowErrors(cxt)) + return jperError; + + initStringInfo(&keybuf); + + keystr = pnstrdup(key.val.string.val, key.val.string.len); + escape_json(&keybuf, keystr); + + ereport(ERROR, + (errcode(ERRCODE_JSON_MEMBER_NOT_FOUND), \ + errmsg(ERRMSG_JSON_MEMBER_NOT_FOUND), + errdetail("JSON object does not contain key %s", + keybuf.data))); + } + } + else if (unwrap && JsonbType(jb) == jbvArray) + return executeItemUnwrapTargetArray(cxt, jsp, jb, found, false); + else if (!jspIgnoreStructuralErrors(cxt)) + { + Assert(found); + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_JSON_MEMBER_NOT_FOUND), + errmsg(ERRMSG_JSON_MEMBER_NOT_FOUND), + errdetail("jsonpath member accessor can " + "only be applied to an object")))); + } + break; + + case jpiRoot: + jb = cxt->root; + baseObject = setBaseObject(cxt, jb, 0); + res = executeNextItem(cxt, jsp, NULL, jb, found, true); + cxt->baseObject = baseObject; + break; + + case jpiCurrent: + res = executeNextItem(cxt, jsp, NULL, cxt->current, + found, true); + break; + + case jpiAnyArray: + if (JsonbType(jb) == jbvArray) + { + bool hasNext = jspGetNext(jsp, &elem); + + res = executeItemUnwrapTargetArray(cxt, hasNext ? &elem : NULL, + jb, found, jspAutoUnwrap(cxt)); + } + else if (jspAutoWrap(cxt)) + res = executeNextItem(cxt, jsp, NULL, jb, found, true); + else if (!jspIgnoreStructuralErrors(cxt)) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_JSON_ARRAY_NOT_FOUND), + errmsg(ERRMSG_JSON_ARRAY_NOT_FOUND), + errdetail("jsonpath wildcard array accessor " + "can only be applied to an array")))); + break; + + case jpiIndexArray: + if (JsonbType(jb) == jbvArray || jspAutoWrap(cxt)) + { + int innermostArraySize = cxt->innermostArraySize; + int i; + int size = JsonbArraySize(jb); + bool singleton = size < 0; + bool hasNext = jspGetNext(jsp, &elem); + + if (singleton) + size = 1; + + cxt->innermostArraySize = size; /* for LAST evaluation */ + + for (i = 0; i < jsp->content.array.nelems; i++) + { + JsonPathItem from; + JsonPathItem to; + int32 index; + int32 index_from; + int32 index_to; + bool range = jspGetArraySubscript(jsp, &from, + &to, i); + + res = getArrayIndex(cxt, &from, jb, &index_from); + + if (jperIsError(res)) + break; + + if (range) + { + res = getArrayIndex(cxt, &to, jb, &index_to); + + if (jperIsError(res)) + break; + } + else + index_to = index_from; + + if (!jspIgnoreStructuralErrors(cxt) && + (index_from < 0 || + index_from > index_to || + index_to >= size)) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_JSON_SUBSCRIPT), + errmsg(ERRMSG_INVALID_JSON_SUBSCRIPT), + errdetail("jsonpath array subscript is " + "out of bounds")))); + + if (index_from < 0) + index_from = 0; + + if (index_to >= size) + index_to = size - 1; + + res = jperNotFound; + + for (index = index_from; index <= index_to; index++) + { + JsonbValue *v; + bool copy; + + if (singleton) + { + v = jb; + copy = true; + } + else + { + v = getIthJsonbValueFromContainer(jb->val.binary.data, + (uint32) index); + + if (v == NULL) + continue; + + copy = false; + } + + if (!hasNext && !found) + return jperOk; + + res = executeNextItem(cxt, jsp, &elem, v, found, + copy); + + if (jperIsError(res)) + break; + + if (res == jperOk && !found) + break; + } + + if (jperIsError(res)) + break; + + if (res == jperOk && !found) + break; + } + + cxt->innermostArraySize = innermostArraySize; + } + else if (!jspIgnoreStructuralErrors(cxt)) + { + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_JSON_ARRAY_NOT_FOUND), + errmsg(ERRMSG_JSON_ARRAY_NOT_FOUND), + errdetail("jsonpath array accessor can " + "only be applied to an array")))); + } + break; + + case jpiLast: + { + JsonbValue tmpjbv; + JsonbValue *lastjbv; + int last; + bool hasNext = jspGetNext(jsp, &elem); + + if (cxt->innermostArraySize < 0) + elog(ERROR, "evaluating jsonpath LAST outside of " + "array subscript"); + + if (!hasNext && !found) + { + res = jperOk; + break; + } + + last = cxt->innermostArraySize - 1; + + lastjbv = hasNext ? &tmpjbv : palloc(sizeof(*lastjbv)); + + lastjbv->type = jbvNumeric; + lastjbv->val.numeric = + DatumGetNumeric(DirectFunctionCall1(int4_numeric, + Int32GetDatum(last))); + + res = executeNextItem(cxt, jsp, &elem, + lastjbv, found, hasNext); + } + break; + + case jpiAnyKey: + if (JsonbType(jb) == jbvObject) + { + bool hasNext = jspGetNext(jsp, &elem); + + if (jb->type != jbvBinary) + elog(ERROR, "invalid jsonb object type: %d", jb->type); + + return executeAnyItem + (cxt, hasNext ? &elem : NULL, + jb->val.binary.data, found, 1, 1, 1, + false, jspAutoUnwrap(cxt)); + } + else if (unwrap && JsonbType(jb) == jbvArray) + return executeItemUnwrapTargetArray(cxt, jsp, jb, found, false); + else if (!jspIgnoreStructuralErrors(cxt)) + { + Assert(found); + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_JSON_OBJECT_NOT_FOUND), + errmsg(ERRMSG_JSON_OBJECT_NOT_FOUND), + errdetail("jsonpath wildcard member accessor " + "can only be applied to an object")))); + } + break; + + case jpiAdd: + return executeBinaryArithmExpr(cxt, jsp, jb, + numeric_add, found); + + case jpiSub: + return executeBinaryArithmExpr(cxt, jsp, jb, + numeric_sub, found); + + case jpiMul: + return executeBinaryArithmExpr(cxt, jsp, jb, + numeric_mul, found); + + case jpiDiv: + return executeBinaryArithmExpr(cxt, jsp, jb, + numeric_div, found); + + case jpiMod: + return executeBinaryArithmExpr(cxt, jsp, jb, + numeric_mod, found); + + case jpiPlus: + return executeUnaryArithmExpr(cxt, jsp, jb, NULL, found); + + case jpiMinus: + return executeUnaryArithmExpr(cxt, jsp, jb, numeric_uminus, + found); + + case jpiFilter: + { + JsonPathBool st; + + if (unwrap && JsonbType(jb) == jbvArray) + return executeItemUnwrapTargetArray(cxt, jsp, jb, found, + false); + + jspGetArg(jsp, &elem); + st = executeNestedBoolItem(cxt, &elem, jb); + if (st != jpbTrue) + res = jperNotFound; + else + res = executeNextItem(cxt, jsp, NULL, + jb, found, true); + break; + } + + case jpiAny: + { + bool hasNext = jspGetNext(jsp, &elem); + + /* first try without any intermediate steps */ + if (jsp->content.anybounds.first == 0) + { + bool savedIgnoreStructuralErrors; + + savedIgnoreStructuralErrors = cxt->ignoreStructuralErrors; + cxt->ignoreStructuralErrors = true; + res = executeNextItem(cxt, jsp, &elem, + jb, found, true); + cxt->ignoreStructuralErrors = savedIgnoreStructuralErrors; + + if (res == jperOk && !found) + break; + } + + if (jb->type == jbvBinary) + res = executeAnyItem + (cxt, hasNext ? &elem : NULL, + jb->val.binary.data, found, + 1, + jsp->content.anybounds.first, + jsp->content.anybounds.last, + true, jspAutoUnwrap(cxt)); + break; + } + + case jpiNull: + case jpiBool: + case jpiNumeric: + case jpiString: + case jpiVariable: + { + JsonbValue vbuf; + JsonbValue *v; + bool hasNext = jspGetNext(jsp, &elem); + + if (!hasNext && !found) + { + res = jperOk; /* skip evaluation */ + break; + } + + v = hasNext ? &vbuf : palloc(sizeof(*v)); + + baseObject = cxt->baseObject; + getJsonPathItem(cxt, jsp, v); + + res = executeNextItem(cxt, jsp, &elem, + v, found, hasNext); + cxt->baseObject = baseObject; + } + break; + + case jpiType: + { + JsonbValue *jbv = palloc(sizeof(*jbv)); + + jbv->type = jbvString; + jbv->val.string.val = pstrdup(JsonbTypeName(jb)); + jbv->val.string.len = strlen(jbv->val.string.val); + + res = executeNextItem(cxt, jsp, NULL, jbv, + found, false); + } + break; + + case jpiSize: + { + int size = JsonbArraySize(jb); + + if (size < 0) + { + if (!jspAutoWrap(cxt)) + { + if (!jspIgnoreStructuralErrors(cxt)) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_JSON_ARRAY_NOT_FOUND), + errmsg(ERRMSG_JSON_ARRAY_NOT_FOUND), + errdetail("jsonpath item method .%s() " + "can only be applied to an array", + jspOperationName(jsp->type))))); + break; + } + + size = 1; + } + + jb = palloc(sizeof(*jb)); + + jb->type = jbvNumeric; + jb->val.numeric = + DatumGetNumeric(DirectFunctionCall1(int4_numeric, + Int32GetDatum(size))); + + res = executeNextItem(cxt, jsp, NULL, jb, found, false); + } + break; + + case jpiAbs: + return executeNumericItemMethod(cxt, jsp, jb, unwrap, numeric_abs, + found); + + case jpiFloor: + return executeNumericItemMethod(cxt, jsp, jb, unwrap, numeric_floor, + found); + + case jpiCeiling: + return executeNumericItemMethod(cxt, jsp, jb, unwrap, numeric_ceil, + found); + + case jpiDouble: + { + JsonbValue jbv; + + if (unwrap && JsonbType(jb) == jbvArray) + return executeItemUnwrapTargetArray(cxt, jsp, jb, found, + false); + + if (jb->type == jbvNumeric) + { + char *tmp = DatumGetCString(DirectFunctionCall1(numeric_out, + NumericGetDatum(jb->val.numeric))); + + (void) float8in_internal(tmp, + NULL, + "double precision", + tmp); + + res = jperOk; + } + else if (jb->type == jbvString) + { + /* cast string as double */ + double val; + char *tmp = pnstrdup(jb->val.string.val, + jb->val.string.len); + + val = float8in_internal(tmp, + NULL, + "double precision", + tmp); + + if (isinf(val)) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_NON_NUMERIC_JSON_ITEM), + errmsg(ERRMSG_NON_NUMERIC_JSON_ITEM), + errdetail("jsonpath item method .%s() can " + "only be applied to a numeric value", + jspOperationName(jsp->type))))); + + jb = &jbv; + jb->type = jbvNumeric; + jb->val.numeric = DatumGetNumeric(DirectFunctionCall1(float8_numeric, + Float8GetDatum(val))); + res = jperOk; + } + + if (res == jperNotFound) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_NON_NUMERIC_JSON_ITEM), + errmsg(ERRMSG_NON_NUMERIC_JSON_ITEM), + errdetail("jsonpath item method .%s() " + "can only be applied to a " + "string or numeric value", + jspOperationName(jsp->type))))); + + res = executeNextItem(cxt, jsp, NULL, jb, found, true); + } + break; + + case jpiKeyValue: + if (unwrap && JsonbType(jb) == jbvArray) + return executeItemUnwrapTargetArray(cxt, jsp, jb, found, false); + + return executeKeyValueMethod(cxt, jsp, jb, found); + + default: + elog(ERROR, "unrecognized jsonpath item type: %d", jsp->type); + } + + return res; +} + +/* + * Unwrap current array item and execute jsonpath for each of its elements. + */ +static JsonPathExecResult +executeItemUnwrapTargetArray(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, JsonValueList *found, + bool unwrapElements) +{ + if (jb->type != jbvBinary) + { + Assert(jb->type != jbvArray); + elog(ERROR, "invalid jsonb array value type: %d", jb->type); + } + + return executeAnyItem + (cxt, jsp, jb->val.binary.data, found, 1, 1, 1, + false, unwrapElements); +} + +/* + * Execute next jsonpath item if exists. Otherwise put "v" to the "found" + * list if provided. + */ +static JsonPathExecResult +executeNextItem(JsonPathExecContext *cxt, + JsonPathItem *cur, JsonPathItem *next, + JsonbValue *v, JsonValueList *found, bool copy) +{ + JsonPathItem elem; + bool hasNext; + + if (!cur) + hasNext = next != NULL; + else if (next) + hasNext = jspHasNext(cur); + else + { + next = &elem; + hasNext = jspGetNext(cur, next); + } + + if (hasNext) + return executeItem(cxt, next, v, found); + + if (found) + JsonValueListAppend(found, copy ? copyJsonbValue(v) : v); + + return jperOk; +} + +/* + * Same as executeItem(), but when "unwrap == true" automatically unwraps + * each array item from the resulting sequence in lax mode. + */ +static JsonPathExecResult +executeItemOptUnwrapResult(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, bool unwrap, + JsonValueList *found) +{ + if (unwrap && jspAutoUnwrap(cxt)) + { + JsonValueList seq = {0}; + JsonValueListIterator it; + JsonPathExecResult res = executeItem(cxt, jsp, jb, &seq); + JsonbValue *item; + + if (jperIsError(res)) + return res; + + JsonValueListInitIterator(&seq, &it); + while ((item = JsonValueListNext(&seq, &it))) + { + Assert(item->type != jbvArray); + + if (JsonbType(item) == jbvArray) + executeItemUnwrapTargetArray(cxt, NULL, item, found, false); + else + JsonValueListAppend(found, item); + } + + return jperOk; + } + + return executeItem(cxt, jsp, jb, found); +} + +/* + * Same as executeItemOptUnwrapResult(), but with error suppression. + */ +static JsonPathExecResult +executeItemOptUnwrapResultNoThrow(JsonPathExecContext *cxt, + JsonPathItem *jsp, + JsonbValue *jb, bool unwrap, + JsonValueList *found) +{ + JsonPathExecResult res; + bool throwErrors = cxt->throwErrors; + + cxt->throwErrors = false; + res = executeItemOptUnwrapResult(cxt, jsp, jb, unwrap, found); + cxt->throwErrors = throwErrors; + + return res; +} + +/* Execute boolean-valued jsonpath expression. */ +static JsonPathBool +executeBoolItem(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, bool canHaveNext) +{ + JsonPathItem larg; + JsonPathItem rarg; + JsonPathBool res; + JsonPathBool res2; + + if (!canHaveNext && jspHasNext(jsp)) + elog(ERROR, "boolean jsonpath item cannot have next item"); + + switch (jsp->type) + { + case jpiAnd: + jspGetLeftArg(jsp, &larg); + res = executeBoolItem(cxt, &larg, jb, false); + + if (res == jpbFalse) + return jpbFalse; + + /* + * SQL/JSON says that we should check second arg in case of + * jperError + */ + + jspGetRightArg(jsp, &rarg); + res2 = executeBoolItem(cxt, &rarg, jb, false); + + return res2 == jpbTrue ? res : res2; + + case jpiOr: + jspGetLeftArg(jsp, &larg); + res = executeBoolItem(cxt, &larg, jb, false); + + if (res == jpbTrue) + return jpbTrue; + + jspGetRightArg(jsp, &rarg); + res2 = executeBoolItem(cxt, &rarg, jb, false); + + return res2 == jpbFalse ? res : res2; + + case jpiNot: + jspGetArg(jsp, &larg); + + res = executeBoolItem(cxt, &larg, jb, false); + + if (res == jpbUnknown) + return jpbUnknown; + + return res == jpbTrue ? jpbFalse : jpbTrue; + + case jpiIsUnknown: + jspGetArg(jsp, &larg); + res = executeBoolItem(cxt, &larg, jb, false); + return res == jpbUnknown ? jpbTrue : jpbFalse; + + case jpiEqual: + case jpiNotEqual: + case jpiLess: + case jpiGreater: + case jpiLessOrEqual: + case jpiGreaterOrEqual: + jspGetLeftArg(jsp, &larg); + jspGetRightArg(jsp, &rarg); + return executePredicate(cxt, jsp, &larg, &rarg, jb, true, + executeComparison, NULL); + + case jpiStartsWith: /* 'whole STARTS WITH initial' */ + jspGetLeftArg(jsp, &larg); /* 'whole' */ + jspGetRightArg(jsp, &rarg); /* 'initial' */ + return executePredicate(cxt, jsp, &larg, &rarg, jb, false, + executeStartsWith, NULL); + + case jpiLikeRegex: /* 'expr LIKE_REGEX pattern FLAGS flags' */ + { + /* + * 'expr' is a sequence-returning expression. 'pattern' is a + * regex string literal. SQL/JSON standard requires XQuery + * regexes, but we use Postgres regexes here. 'flags' is a + * string literal converted to integer flags at compile-time. + */ + JsonLikeRegexContext lrcxt = {0}; + + jspInitByBuffer(&larg, jsp->base, + jsp->content.like_regex.expr); + + return executePredicate(cxt, jsp, &larg, NULL, jb, false, + executeLikeRegex, &lrcxt); + } + + case jpiExists: + jspGetArg(jsp, &larg); + + if (jspStrictAbsenseOfErrors(cxt)) + { + /* + * In strict mode we must get a complete list of values to + * check that there are no errors at all. + */ + JsonValueList vals = {0}; + JsonPathExecResult res = + executeItemOptUnwrapResultNoThrow(cxt, &larg, jb, + false, &vals); + + if (jperIsError(res)) + return jpbUnknown; + + return JsonValueListIsEmpty(&vals) ? jpbFalse : jpbTrue; + } + else + { + JsonPathExecResult res = + executeItemOptUnwrapResultNoThrow(cxt, &larg, jb, + false, NULL); + + if (jperIsError(res)) + return jpbUnknown; + + return res == jperOk ? jpbTrue : jpbFalse; + } + + default: + elog(ERROR, "invalid boolean jsonpath item type: %d", jsp->type); + return jpbUnknown; + } +} + +/* + * Execute nested (filters etc.) boolean expression pushing current SQL/JSON + * item onto the stack. + */ +static JsonPathBool +executeNestedBoolItem(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb) +{ + JsonbValue *prev; + JsonPathBool res; + + prev = cxt->current; + cxt->current = jb; + res = executeBoolItem(cxt, jsp, jb, false); + cxt->current = prev; + + return res; +} + +/* + * Implementation of several jsonpath nodes: + * - jpiAny (.** accessor), + * - jpiAnyKey (.* accessor), + * - jpiAnyArray ([*] accessor) + */ +static JsonPathExecResult +executeAnyItem(JsonPathExecContext *cxt, JsonPathItem *jsp, JsonbContainer *jbc, + JsonValueList *found, uint32 level, uint32 first, uint32 last, + bool ignoreStructuralErrors, bool unwrapNext) +{ + JsonPathExecResult res = jperNotFound; + JsonbIterator *it; + int32 r; + JsonbValue v; + + check_stack_depth(); + + if (level > last) + return res; + + it = JsonbIteratorInit(jbc); + + /* + * Recursively iterate over jsonb objects/arrays + */ + while ((r = JsonbIteratorNext(&it, &v, true)) != WJB_DONE) + { + if (r == WJB_KEY) + { + r = JsonbIteratorNext(&it, &v, true); + Assert(r == WJB_VALUE); + } + + if (r == WJB_VALUE || r == WJB_ELEM) + { + + if (level >= first || + (first == PG_UINT32_MAX && last == PG_UINT32_MAX && + v.type != jbvBinary)) /* leaves only requested */ + { + /* check expression */ + if (jsp) + { + if (ignoreStructuralErrors) + { + bool savedIgnoreStructuralErrors; + + savedIgnoreStructuralErrors = cxt->ignoreStructuralErrors; + cxt->ignoreStructuralErrors = true; + res = executeItemOptUnwrapTarget(cxt, jsp, &v, found, unwrapNext); + cxt->ignoreStructuralErrors = savedIgnoreStructuralErrors; + } + else + res = executeItemOptUnwrapTarget(cxt, jsp, &v, found, unwrapNext); + + if (jperIsError(res)) + break; + + if (res == jperOk && !found) + break; + } + else if (found) + JsonValueListAppend(found, copyJsonbValue(&v)); + else + return jperOk; + } + + if (level < last && v.type == jbvBinary) + { + res = executeAnyItem + (cxt, jsp, v.val.binary.data, found, + level + 1, first, last, + ignoreStructuralErrors, unwrapNext); + + if (jperIsError(res)) + break; + + if (res == jperOk && found == NULL) + break; + } + } + } + + return res; +} + +/* + * Execute unary or binary predicate. + * + * Predicates have existence semantics, because their operands are item + * sequences. Pairs of items from the left and right operand's sequences are + * checked. TRUE returned only if any pair satisfying the condition is found. + * In strict mode, even if the desired pair has already been found, all pairs + * still need to be examined to check the absence of errors. If any error + * occurs, UNKNOWN (analogous to SQL NULL) is returned. + */ +static JsonPathBool +executePredicate(JsonPathExecContext *cxt, JsonPathItem *pred, + JsonPathItem *larg, JsonPathItem *rarg, JsonbValue *jb, + bool unwrapRightArg, JsonPathPredicateCallback exec, + void *param) +{ + JsonPathExecResult res; + JsonValueListIterator lseqit; + JsonValueList lseq = {0}; + JsonValueList rseq = {0}; + JsonbValue *lval; + bool error = false; + bool found = false; + + /* Left argument is always auto-unwrapped. */ + res = executeItemOptUnwrapResultNoThrow(cxt, larg, jb, true, &lseq); + if (jperIsError(res)) + return jpbUnknown; + + if (rarg) + { + /* Right argument is conditionally auto-unwrapped. */ + res = executeItemOptUnwrapResultNoThrow(cxt, rarg, jb, + unwrapRightArg, &rseq); + if (jperIsError(res)) + return jpbUnknown; + } + + JsonValueListInitIterator(&lseq, &lseqit); + while ((lval = JsonValueListNext(&lseq, &lseqit))) + { + JsonValueListIterator rseqit; + JsonbValue *rval; + bool first = true; + + if (rarg) + { + JsonValueListInitIterator(&rseq, &rseqit); + rval = JsonValueListNext(&rseq, &rseqit); + } + else + { + rval = NULL; + } + + /* Loop over right arg sequence or do single pass otherwise */ + while (rarg ? (rval != NULL) : first) + { + JsonPathBool res = exec(pred, lval, rval, param); + + if (res == jpbUnknown) + { + if (jspStrictAbsenseOfErrors(cxt)) + return jpbUnknown; + + error = true; + } + else if (res == jpbTrue) + { + if (!jspStrictAbsenseOfErrors(cxt)) + return jpbTrue; + + found = true; + } + + first = false; + if (rarg) + rval = JsonValueListNext(&rseq, &rseqit); + } + } + + if (found) /* possible only in strict mode */ + return jpbTrue; + + if (error) /* possible only in lax mode */ + return jpbUnknown; + + return jpbFalse; +} + +/* + * Execute binary arithmetic expression on singleton numeric operands. + * Array operands are automatically unwrapped in lax mode. + */ +static JsonPathExecResult +executeBinaryArithmExpr(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, PGFunction func, + JsonValueList *found) +{ + JsonPathExecResult jper; + JsonPathItem elem; + JsonValueList lseq = {0}; + JsonValueList rseq = {0}; + JsonbValue *lval; + JsonbValue *rval; + Datum res; + + jspGetLeftArg(jsp, &elem); + + /* + * XXX: By standard only operands of multiplicative expressions are + * unwrapped. We extend it to other binary arithmetics expressions too. + */ + jper = executeItemOptUnwrapResult(cxt, &elem, jb, true, &lseq); + if (jperIsError(jper)) + return jper; + + jspGetRightArg(jsp, &elem); + + jper = executeItemOptUnwrapResult(cxt, &elem, jb, true, &rseq); + if (jperIsError(jper)) + return jper; + + if (JsonValueListLength(&lseq) != 1 || + !(lval = getScalar(JsonValueListHead(&lseq), jbvNumeric))) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_SINGLETON_JSON_ITEM_REQUIRED), + errmsg(ERRMSG_SINGLETON_JSON_ITEM_REQUIRED), + errdetail("left operand of binary jsonpath operator %s " + "is not a singleton numeric value", + jspOperationName(jsp->type))))); + + if (JsonValueListLength(&rseq) != 1 || + !(rval = getScalar(JsonValueListHead(&rseq), jbvNumeric))) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_SINGLETON_JSON_ITEM_REQUIRED), + errmsg(ERRMSG_SINGLETON_JSON_ITEM_REQUIRED), + errdetail("right operand of binary jsonpath operator %s " + "is not a singleton numeric value", + jspOperationName(jsp->type))))); + + res = DirectFunctionCall2(func, + NumericGetDatum(lval->val.numeric), + NumericGetDatum(rval->val.numeric)); + + if (!jspGetNext(jsp, &elem) && !found) + return jperOk; + + lval = palloc(sizeof(*lval)); + lval->type = jbvNumeric; + lval->val.numeric = DatumGetNumeric(res); + + return executeNextItem(cxt, jsp, &elem, lval, found, false); +} + +/* + * Execute unary arithmetic expression for each numeric item in its operand's + * sequence. Array operand is automatically unwrapped in lax mode. + */ +static JsonPathExecResult +executeUnaryArithmExpr(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, PGFunction func, JsonValueList *found) +{ + JsonPathExecResult jper; + JsonPathExecResult jper2; + JsonPathItem elem; + JsonValueList seq = {0}; + JsonValueListIterator it; + JsonbValue *val; + bool hasNext; + + jspGetArg(jsp, &elem); + jper = executeItemOptUnwrapResult(cxt, &elem, jb, true, &seq); + + if (jperIsError(jper)) + return jper; + + jper = jperNotFound; + + hasNext = jspGetNext(jsp, &elem); + + JsonValueListInitIterator(&seq, &it); + while ((val = JsonValueListNext(&seq, &it))) + { + if ((val = getScalar(val, jbvNumeric))) + { + if (!found && !hasNext) + return jperOk; + } + else + { + if (!found && !hasNext) + continue; /* skip non-numerics processing */ + + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_JSON_NUMBER_NOT_FOUND), + errmsg(ERRMSG_JSON_NUMBER_NOT_FOUND), + errdetail("operand of unary jsonpath operator %s " + "is not a numeric value", + jspOperationName(jsp->type))))); + } + + if (func) + val->val.numeric = + DatumGetNumeric(DirectFunctionCall1(func, + NumericGetDatum(val->val.numeric))); + + jper2 = executeNextItem(cxt, jsp, &elem, val, found, false); + + if (jperIsError(jper2)) + return jper2; + + if (jper2 == jperOk) + { + if (!found) + return jperOk; + jper = jperOk; + } + } + + return jper; +} + +/* + * STARTS_WITH predicate callback. + * + * Check if the 'whole' string starts from 'initial' string. + */ +static JsonPathBool +executeStartsWith(JsonPathItem *jsp, JsonbValue *whole, JsonbValue *initial, + void *param) +{ + if (!(whole = getScalar(whole, jbvString))) + return jpbUnknown; /* error */ + + if (!(initial = getScalar(initial, jbvString))) + return jpbUnknown; /* error */ + + if (whole->val.string.len >= initial->val.string.len && + !memcmp(whole->val.string.val, + initial->val.string.val, + initial->val.string.len)) + return jpbTrue; + + return jpbFalse; +} + +/* + * LIKE_REGEX predicate callback. + * + * Check if the string matches regex pattern. + */ +static JsonPathBool +executeLikeRegex(JsonPathItem *jsp, JsonbValue *str, JsonbValue *rarg, + void *param) +{ + JsonLikeRegexContext *cxt = param; + + if (!(str = getScalar(str, jbvString))) + return jpbUnknown; + + /* Cache regex text and converted flags. */ + if (!cxt->regex) + { + uint32 flags = jsp->content.like_regex.flags; + + cxt->regex = + cstring_to_text_with_len(jsp->content.like_regex.pattern, + jsp->content.like_regex.patternlen); + + /* Convert regex flags. */ + cxt->cflags = REG_ADVANCED; + + if (flags & JSP_REGEX_ICASE) + cxt->cflags |= REG_ICASE; + if (flags & JSP_REGEX_MLINE) + cxt->cflags |= REG_NEWLINE; + if (flags & JSP_REGEX_SLINE) + cxt->cflags &= ~REG_NEWLINE; + if (flags & JSP_REGEX_WSPACE) + cxt->cflags |= REG_EXPANDED; + } + + if (RE_compile_and_execute(cxt->regex, str->val.string.val, + str->val.string.len, + cxt->cflags, DEFAULT_COLLATION_OID, 0, NULL)) + return jpbTrue; + + return jpbFalse; +} + +/* + * Execute numeric item methods (.abs(), .floor(), .ceil()) using the specified + * user function 'func'. + */ +static JsonPathExecResult +executeNumericItemMethod(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, bool unwrap, PGFunction func, + JsonValueList *found) +{ + JsonPathItem next; + Datum datum; + + if (unwrap && JsonbType(jb) == jbvArray) + return executeItemUnwrapTargetArray(cxt, jsp, jb, found, false); + + if (!(jb = getScalar(jb, jbvNumeric))) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_NON_NUMERIC_JSON_ITEM), + errmsg(ERRMSG_NON_NUMERIC_JSON_ITEM), + errdetail("jsonpath item method .%s() can only " + "be applied to a numeric value", + jspOperationName(jsp->type))))); + + datum = NumericGetDatum(jb->val.numeric); + datum = DirectFunctionCall1(func, datum); + + if (!jspGetNext(jsp, &next) && !found) + return jperOk; + + jb = palloc(sizeof(*jb)); + jb->type = jbvNumeric; + jb->val.numeric = DatumGetNumeric(datum); + + return executeNextItem(cxt, jsp, &next, jb, found, false); +} + +/* + * Implementation of .keyvalue() method. + * + * .keyvalue() method returns a sequence of object's key-value pairs in the + * following format: '{ "key": key, "value": value, "id": id }'. + * + * "id" field is an object identifier which is constructed from the two parts: + * base object id and its binary offset in base object's jsonb: + * id = 10000000000 * base_object_id + obj_offset_in_base_object + * + * 10000000000 (10^10) -- is a first round decimal number greater than 2^32 + * (maximal offset in jsonb). Decimal multiplier is used here to improve the + * readability of identifiers. + * + * Base object is usually a root object of the path: context item '$' or path + * variable '$var', literals can't produce objects for now. But if the path + * contains generated objects (.keyvalue() itself, for example), then they + * become base object for the subsequent .keyvalue(). + * + * Id of '$' is 0. Id of '$var' is its ordinal (positive) number in the list + * of variables (see getJsonPathVariable()). Ids for generated objects + * are assigned using global counter JsonPathExecContext.lastGeneratedObjectId. + */ +static JsonPathExecResult +executeKeyValueMethod(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonbValue *jb, JsonValueList *found) +{ + JsonPathExecResult res = jperNotFound; + JsonPathItem next; + JsonbContainer *jbc; + JsonbValue key; + JsonbValue val; + JsonbValue idval; + JsonbValue keystr; + JsonbValue valstr; + JsonbValue idstr; + JsonbIterator *it; + JsonbIteratorToken tok; + int64 id; + bool hasNext; + + if (JsonbType(jb) != jbvObject || jb->type != jbvBinary) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_JSON_OBJECT_NOT_FOUND), + errmsg(ERRMSG_JSON_OBJECT_NOT_FOUND), + errdetail("jsonpath item method .%s() " + "can only be applied to an object", + jspOperationName(jsp->type))))); + + jbc = jb->val.binary.data; + + if (!JsonContainerSize(jbc)) + return jperNotFound; /* no key-value pairs */ + + hasNext = jspGetNext(jsp, &next); + + keystr.type = jbvString; + keystr.val.string.val = "key"; + keystr.val.string.len = 3; + + valstr.type = jbvString; + valstr.val.string.val = "value"; + valstr.val.string.len = 5; + + idstr.type = jbvString; + idstr.val.string.val = "id"; + idstr.val.string.len = 2; + + /* construct object id from its base object and offset inside that */ + id = jb->type != jbvBinary ? 0 : + (int64) ((char *) jbc - (char *) cxt->baseObject.jbc); + id += (int64) cxt->baseObject.id * INT64CONST(10000000000); + + idval.type = jbvNumeric; + idval.val.numeric = DatumGetNumeric(DirectFunctionCall1(int8_numeric, + Int64GetDatum(id))); + + it = JsonbIteratorInit(jbc); + + while ((tok = JsonbIteratorNext(&it, &key, true)) != WJB_DONE) + { + JsonBaseObjectInfo baseObject; + JsonbValue obj; + JsonbParseState *ps; + JsonbValue *keyval; + Jsonb *jsonb; + + if (tok != WJB_KEY) + continue; + + res = jperOk; + + if (!hasNext && !found) + break; + + tok = JsonbIteratorNext(&it, &val, true); + Assert(tok == WJB_VALUE); + + ps = NULL; + pushJsonbValue(&ps, WJB_BEGIN_OBJECT, NULL); + + pushJsonbValue(&ps, WJB_KEY, &keystr); + pushJsonbValue(&ps, WJB_VALUE, &key); + + pushJsonbValue(&ps, WJB_KEY, &valstr); + pushJsonbValue(&ps, WJB_VALUE, &val); + + pushJsonbValue(&ps, WJB_KEY, &idstr); + pushJsonbValue(&ps, WJB_VALUE, &idval); + + keyval = pushJsonbValue(&ps, WJB_END_OBJECT, NULL); + + jsonb = JsonbValueToJsonb(keyval); + + JsonbInitBinary(&obj, jsonb); + + baseObject = setBaseObject(cxt, &obj, cxt->lastGeneratedObjectId++); + + res = executeNextItem(cxt, jsp, &next, &obj, found, true); + + cxt->baseObject = baseObject; + + if (jperIsError(res)) + return res; + + if (res == jperOk && !found) + break; + } + + return res; +} + +/* + * Convert boolean execution status 'res' to a boolean JSON item and execute + * next jsonpath. + */ +static JsonPathExecResult +appendBoolResult(JsonPathExecContext *cxt, JsonPathItem *jsp, + JsonValueList *found, JsonPathBool res) +{ + JsonPathItem next; + JsonbValue jbv; + + if (!jspGetNext(jsp, &next) && !found) + return jperOk; /* found singleton boolean value */ + + if (res == jpbUnknown) + { + jbv.type = jbvNull; + } + else + { + jbv.type = jbvBool; + jbv.val.boolean = res == jpbTrue; + } + + return executeNextItem(cxt, jsp, &next, &jbv, found, true); +} + +/* + * Convert jsonpath's scalar or variable node to actual jsonb value. + * + * If node is a variable then its id returned, otherwise 0 returned. + */ +static void +getJsonPathItem(JsonPathExecContext *cxt, JsonPathItem *item, + JsonbValue *value) +{ + switch (item->type) + { + case jpiNull: + value->type = jbvNull; + break; + case jpiBool: + value->type = jbvBool; + value->val.boolean = jspGetBool(item); + break; + case jpiNumeric: + value->type = jbvNumeric; + value->val.numeric = jspGetNumeric(item); + break; + case jpiString: + value->type = jbvString; + value->val.string.val = jspGetString(item, + &value->val.string.len); + break; + case jpiVariable: + getJsonPathVariable(cxt, item, cxt->vars, value); + return; + default: + elog(ERROR, "unexpected jsonpath item type"); + } +} + +/* + * Get the value of variable passed to jsonpath executor + */ +static void +getJsonPathVariable(JsonPathExecContext *cxt, JsonPathItem *variable, + Jsonb *vars, JsonbValue *value) +{ + char *varName; + int varNameLength; + JsonbValue tmp; + JsonbValue *v; + + if (!vars) + { + value->type = jbvNull; + return; + } + + Assert(variable->type == jpiVariable); + varName = jspGetString(variable, &varNameLength); + tmp.type = jbvString; + tmp.val.string.val = varName; + tmp.val.string.len = varNameLength; + + v = findJsonbValueFromContainer(&vars->root, JB_FOBJECT, &tmp); + + if (v) + { + *value = *v; + pfree(v); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("cannot find jsonpath variable '%s'", + pnstrdup(varName, varNameLength)))); + } + + JsonbInitBinary(&tmp, vars); + setBaseObject(cxt, &tmp, 1); +} + +/**************** Support functions for JsonPath execution *****************/ + +/* + * Returns the size of an array item, or -1 if item is not an array. + */ +static int +JsonbArraySize(JsonbValue *jb) +{ + Assert(jb->type != jbvArray); + + if (jb->type == jbvBinary) + { + JsonbContainer *jbc = jb->val.binary.data; + + if (JsonContainerIsArray(jbc) && !JsonContainerIsScalar(jbc)) + return JsonContainerSize(jbc); + } + + return -1; +} + +/* Comparison predicate callback. */ +static JsonPathBool +executeComparison(JsonPathItem *cmp, JsonbValue *lv, JsonbValue *rv, void *p) +{ + return compareItems(cmp->type, lv, rv); +} + +/* + * Compare two SQL/JSON items using comparison operation 'op'. + */ +static JsonPathBool +compareItems(int32 op, JsonbValue *jb1, JsonbValue *jb2) +{ + int cmp; + bool res; + + if (jb1->type != jb2->type) + { + if (jb1->type == jbvNull || jb2->type == jbvNull) + + /* + * Equality and order comparison of nulls to non-nulls returns + * always false, but inequality comparison returns true. + */ + return op == jpiNotEqual ? jpbTrue : jpbFalse; + + /* Non-null items of different types are not comparable. */ + return jpbUnknown; + } + + switch (jb1->type) + { + case jbvNull: + cmp = 0; + break; + case jbvBool: + cmp = jb1->val.boolean == jb2->val.boolean ? 0 : + jb1->val.boolean ? 1 : -1; + break; + case jbvNumeric: + cmp = compareNumeric(jb1->val.numeric, jb2->val.numeric); + break; + case jbvString: + if (op == jpiEqual) + return jb1->val.string.len != jb2->val.string.len || + memcmp(jb1->val.string.val, + jb2->val.string.val, + jb1->val.string.len) ? jpbFalse : jpbTrue; + + cmp = varstr_cmp(jb1->val.string.val, jb1->val.string.len, + jb2->val.string.val, jb2->val.string.len, + DEFAULT_COLLATION_OID); + break; + + case jbvBinary: + case jbvArray: + case jbvObject: + return jpbUnknown; /* non-scalars are not comparable */ + + default: + elog(ERROR, "invalid jsonb value type %d", jb1->type); + } + + switch (op) + { + case jpiEqual: + res = (cmp == 0); + break; + case jpiNotEqual: + res = (cmp != 0); + break; + case jpiLess: + res = (cmp < 0); + break; + case jpiGreater: + res = (cmp > 0); + break; + case jpiLessOrEqual: + res = (cmp <= 0); + break; + case jpiGreaterOrEqual: + res = (cmp >= 0); + break; + default: + elog(ERROR, "unrecognized jsonpath operation: %d", op); + return jpbUnknown; + } + + return res ? jpbTrue : jpbFalse; +} + +/* Compare two numerics */ +static int +compareNumeric(Numeric a, Numeric b) +{ + return DatumGetInt32(DirectFunctionCall2(numeric_cmp, + PointerGetDatum(a), + PointerGetDatum(b))); +} + +static JsonbValue * +copyJsonbValue(JsonbValue *src) +{ + JsonbValue *dst = palloc(sizeof(*dst)); + + *dst = *src; + + return dst; +} + +/* + * Execute array subscript expression and convert resulting numeric item to + * the integer type with truncation. + */ +static JsonPathExecResult +getArrayIndex(JsonPathExecContext *cxt, JsonPathItem *jsp, JsonbValue *jb, + int32 *index) +{ + JsonbValue *jbv; + JsonValueList found = {0}; + JsonPathExecResult res = executeItem(cxt, jsp, jb, &found); + Datum numeric_index; + + if (jperIsError(res)) + return res; + + if (JsonValueListLength(&found) != 1 || + !(jbv = getScalar(JsonValueListHead(&found), jbvNumeric))) + RETURN_ERROR(ereport(ERROR, + (errcode(ERRCODE_INVALID_JSON_SUBSCRIPT), + errmsg(ERRMSG_INVALID_JSON_SUBSCRIPT), + errdetail("jsonpath array subscript is not a " + "singleton numeric value")))); + + numeric_index = DirectFunctionCall2(numeric_trunc, + NumericGetDatum(jbv->val.numeric), + Int32GetDatum(0)); + + *index = DatumGetInt32(DirectFunctionCall1(numeric_int4, numeric_index)); + + return jperOk; +} + +/* Save base object and its id needed for the execution of .keyvalue(). */ +static JsonBaseObjectInfo +setBaseObject(JsonPathExecContext *cxt, JsonbValue *jbv, int32 id) +{ + JsonBaseObjectInfo baseObject = cxt->baseObject; + + cxt->baseObject.jbc = jbv->type != jbvBinary ? NULL : + (JsonbContainer *) jbv->val.binary.data; + cxt->baseObject.id = id; + + return baseObject; +} + +static void +JsonValueListAppend(JsonValueList *jvl, JsonbValue *jbv) +{ + if (jvl->singleton) + { + jvl->list = list_make2(jvl->singleton, jbv); + jvl->singleton = NULL; + } + else if (!jvl->list) + jvl->singleton = jbv; + else + jvl->list = lappend(jvl->list, jbv); +} + +static int +JsonValueListLength(const JsonValueList *jvl) +{ + return jvl->singleton ? 1 : list_length(jvl->list); +} + +static bool +JsonValueListIsEmpty(JsonValueList *jvl) +{ + return !jvl->singleton && list_length(jvl->list) <= 0; +} + +static JsonbValue * +JsonValueListHead(JsonValueList *jvl) +{ + return jvl->singleton ? jvl->singleton : linitial(jvl->list); +} + +static List * +JsonValueListGetList(JsonValueList *jvl) +{ + if (jvl->singleton) + return list_make1(jvl->singleton); + + return jvl->list; +} + +static void +JsonValueListInitIterator(const JsonValueList *jvl, JsonValueListIterator *it) +{ + if (jvl->singleton) + { + it->value = jvl->singleton; + it->next = NULL; + } + else if (list_head(jvl->list) != NULL) + { + it->value = (JsonbValue *) linitial(jvl->list); + it->next = lnext(list_head(jvl->list)); + } + else + { + it->value = NULL; + it->next = NULL; + } +} + +/* + * Get the next item from the sequence advancing iterator. + */ +static JsonbValue * +JsonValueListNext(const JsonValueList *jvl, JsonValueListIterator *it) +{ + JsonbValue *result = it->value; + + if (it->next) + { + it->value = lfirst(it->next); + it->next = lnext(it->next); + } + else + { + it->value = NULL; + } + + return result; +} + +/* + * Initialize a binary JsonbValue with the given jsonb container. + */ +static JsonbValue * +JsonbInitBinary(JsonbValue *jbv, Jsonb *jb) +{ + jbv->type = jbvBinary; + jbv->val.binary.data = &jb->root; + jbv->val.binary.len = VARSIZE_ANY_EXHDR(jb); + + return jbv; +} + +/* + * Returns jbv* type of of JsonbValue. Note, it never returns jbvBinary as is. + */ +static int +JsonbType(JsonbValue *jb) +{ + int type = jb->type; + + if (jb->type == jbvBinary) + { + JsonbContainer *jbc = (void *) jb->val.binary.data; + + /* Scalars should be always extracted during jsonpath execution. */ + Assert(!JsonContainerIsScalar(jbc)); + + if (JsonContainerIsObject(jbc)) + type = jbvObject; + else if (JsonContainerIsArray(jbc)) + type = jbvArray; + else + elog(ERROR, "invalid jsonb container type: 0x%08x", jbc->header); + } + + return type; +} + +/* Get scalar of given type or NULL on type mismatch */ +static JsonbValue * +getScalar(JsonbValue *scalar, enum jbvType type) +{ + /* Scalars should be always extracted during jsonpath execution. */ + Assert(scalar->type != jbvBinary || + !JsonContainerIsScalar(scalar->val.binary.data)); + + return scalar->type == type ? scalar : NULL; +} + +/* Construct a JSON array from the item list */ +static JsonbValue * +wrapItemsInArray(const JsonValueList *items) +{ + JsonbParseState *ps = NULL; + JsonValueListIterator it; + JsonbValue *jbv; + + pushJsonbValue(&ps, WJB_BEGIN_ARRAY, NULL); + + JsonValueListInitIterator(items, &it); + while ((jbv = JsonValueListNext(items, &it))) + pushJsonbValue(&ps, WJB_ELEM, jbv); + + return pushJsonbValue(&ps, WJB_END_ARRAY, NULL); +} diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y new file mode 100644 index 00000000000..183861f780f --- /dev/null +++ b/src/backend/utils/adt/jsonpath_gram.y @@ -0,0 +1,480 @@ +/*------------------------------------------------------------------------- + * + * jsonpath_gram.y + * Grammar definitions for jsonpath datatype + * + * Copyright (c) 2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/jsonpath_gram.y + * + *------------------------------------------------------------------------- + */ + +%{ +#include "postgres.h" + +#include "catalog/pg_collation.h" +#include "fmgr.h" +#include "miscadmin.h" +#include "nodes/pg_list.h" +#include "regex/regex.h" +#include "utils/builtins.h" +#include "utils/jsonpath.h" +#include "utils/jsonpath_scanner.h" + +/* + * Bison doesn't allocate anything that needs to live across parser calls, + * so we can easily have it use palloc instead of malloc. This prevents + * memory leaks if we error out during parsing. Note this only works with + * bison >= 2.0. However, in bison 1.875 the default is to use alloca() + * if possible, so there's not really much problem anyhow, at least if + * you're building with gcc. + */ +#define YYMALLOC palloc +#define YYFREE pfree + +static JsonPathParseItem* +makeItemType(int type) +{ + JsonPathParseItem* v = palloc(sizeof(*v)); + + CHECK_FOR_INTERRUPTS(); + + v->type = type; + v->next = NULL; + + return v; +} + +static JsonPathParseItem* +makeItemString(string *s) +{ + JsonPathParseItem *v; + + if (s == NULL) + { + v = makeItemType(jpiNull); + } + else + { + v = makeItemType(jpiString); + v->value.string.val = s->val; + v->value.string.len = s->len; + } + + return v; +} + +static JsonPathParseItem* +makeItemVariable(string *s) +{ + JsonPathParseItem *v; + + v = makeItemType(jpiVariable); + v->value.string.val = s->val; + v->value.string.len = s->len; + + return v; +} + +static JsonPathParseItem* +makeItemKey(string *s) +{ + JsonPathParseItem *v; + + v = makeItemString(s); + v->type = jpiKey; + + return v; +} + +static JsonPathParseItem* +makeItemNumeric(string *s) +{ + JsonPathParseItem *v; + + v = makeItemType(jpiNumeric); + v->value.numeric = + DatumGetNumeric(DirectFunctionCall3(numeric_in, + CStringGetDatum(s->val), 0, -1)); + + return v; +} + +static JsonPathParseItem* +makeItemBool(bool val) { + JsonPathParseItem *v = makeItemType(jpiBool); + + v->value.boolean = val; + + return v; +} + +static JsonPathParseItem* +makeItemBinary(int type, JsonPathParseItem* la, JsonPathParseItem *ra) +{ + JsonPathParseItem *v = makeItemType(type); + + v->value.args.left = la; + v->value.args.right = ra; + + return v; +} + +static JsonPathParseItem* +makeItemUnary(int type, JsonPathParseItem* a) +{ + JsonPathParseItem *v; + + if (type == jpiPlus && a->type == jpiNumeric && !a->next) + return a; + + if (type == jpiMinus && a->type == jpiNumeric && !a->next) + { + v = makeItemType(jpiNumeric); + v->value.numeric = + DatumGetNumeric(DirectFunctionCall1(numeric_uminus, + NumericGetDatum(a->value.numeric))); + return v; + } + + v = makeItemType(type); + + v->value.arg = a; + + return v; +} + +static JsonPathParseItem* +makeItemList(List *list) +{ + JsonPathParseItem *head, *end; + ListCell *cell = list_head(list); + + head = end = (JsonPathParseItem *) lfirst(cell); + + if (!lnext(cell)) + return head; + + /* append items to the end of already existing list */ + while (end->next) + end = end->next; + + for_each_cell(cell, lnext(cell)) + { + JsonPathParseItem *c = (JsonPathParseItem *) lfirst(cell); + + end->next = c; + end = c; + } + + return head; +} + +static JsonPathParseItem* +makeIndexArray(List *list) +{ + JsonPathParseItem *v = makeItemType(jpiIndexArray); + ListCell *cell; + int i = 0; + + Assert(list_length(list) > 0); + v->value.array.nelems = list_length(list); + + v->value.array.elems = palloc(sizeof(v->value.array.elems[0]) * + v->value.array.nelems); + + foreach(cell, list) + { + JsonPathParseItem *jpi = lfirst(cell); + + Assert(jpi->type == jpiSubscript); + + v->value.array.elems[i].from = jpi->value.args.left; + v->value.array.elems[i++].to = jpi->value.args.right; + } + + return v; +} + +static JsonPathParseItem* +makeAny(int first, int last) +{ + JsonPathParseItem *v = makeItemType(jpiAny); + + v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX; + v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX; + + return v; +} + +static JsonPathParseItem * +makeItemLikeRegex(JsonPathParseItem *expr, string *pattern, string *flags) +{ + JsonPathParseItem *v = makeItemType(jpiLikeRegex); + int i; + int cflags = REG_ADVANCED; + + v->value.like_regex.expr = expr; + v->value.like_regex.pattern = pattern->val; + v->value.like_regex.patternlen = pattern->len; + v->value.like_regex.flags = 0; + + for (i = 0; flags && i < flags->len; i++) + { + switch (flags->val[i]) + { + case 'i': + v->value.like_regex.flags |= JSP_REGEX_ICASE; + cflags |= REG_ICASE; + break; + case 's': + v->value.like_regex.flags &= ~JSP_REGEX_MLINE; + v->value.like_regex.flags |= JSP_REGEX_SLINE; + cflags |= REG_NEWLINE; + break; + case 'm': + v->value.like_regex.flags &= ~JSP_REGEX_SLINE; + v->value.like_regex.flags |= JSP_REGEX_MLINE; + cflags &= ~REG_NEWLINE; + break; + case 'x': + v->value.like_regex.flags |= JSP_REGEX_WSPACE; + cflags |= REG_EXPANDED; + break; + default: + yyerror(NULL, "unrecognized flag of LIKE_REGEX predicate"); + break; + } + } + + /* check regex validity */ + (void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val, + pattern->len), + cflags, DEFAULT_COLLATION_OID); + + return v; +} + +%} + +/* BISON Declarations */ +%pure-parser +%expect 0 +%name-prefix="jsonpath_yy" +%error-verbose +%parse-param {JsonPathParseResult **result} + +%union { + string str; + List *elems; /* list of JsonPathParseItem */ + List *indexs; /* list of integers */ + JsonPathParseItem *value; + JsonPathParseResult *result; + JsonPathItemType optype; + bool boolean; + int integer; +} + +%token <str> TO_P NULL_P TRUE_P FALSE_P IS_P UNKNOWN_P EXISTS_P +%token <str> IDENT_P STRING_P NUMERIC_P INT_P VARIABLE_P +%token <str> OR_P AND_P NOT_P +%token <str> LESS_P LESSEQUAL_P EQUAL_P NOTEQUAL_P GREATEREQUAL_P GREATER_P +%token <str> ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P +%token <str> ABS_P SIZE_P TYPE_P FLOOR_P DOUBLE_P CEILING_P KEYVALUE_P + +%type <result> result + +%type <value> scalar_value path_primary expr array_accessor + any_path accessor_op key predicate delimited_predicate + index_elem starts_with_initial expr_or_predicate + +%type <elems> accessor_expr + +%type <indexs> index_list + +%type <optype> comp_op method + +%type <boolean> mode + +%type <str> key_name + +%type <integer> any_level + +%left OR_P +%left AND_P +%right NOT_P +%left '+' '-' +%left '*' '/' '%' +%left UMINUS +%nonassoc '(' ')' + +/* Grammar follows */ +%% + +result: + mode expr_or_predicate { + *result = palloc(sizeof(JsonPathParseResult)); + (*result)->expr = $2; + (*result)->lax = $1; + } + | /* EMPTY */ { *result = NULL; } + ; + +expr_or_predicate: + expr { $$ = $1; } + | predicate { $$ = $1; } + ; + +mode: + STRICT_P { $$ = false; } + | LAX_P { $$ = true; } + | /* EMPTY */ { $$ = true; } + ; + +scalar_value: + STRING_P { $$ = makeItemString(&$1); } + | NULL_P { $$ = makeItemString(NULL); } + | TRUE_P { $$ = makeItemBool(true); } + | FALSE_P { $$ = makeItemBool(false); } + | NUMERIC_P { $$ = makeItemNumeric(&$1); } + | INT_P { $$ = makeItemNumeric(&$1); } + | VARIABLE_P { $$ = makeItemVariable(&$1); } + ; + +comp_op: + EQUAL_P { $$ = jpiEqual; } + | NOTEQUAL_P { $$ = jpiNotEqual; } + | LESS_P { $$ = jpiLess; } + | GREATER_P { $$ = jpiGreater; } + | LESSEQUAL_P { $$ = jpiLessOrEqual; } + | GREATEREQUAL_P { $$ = jpiGreaterOrEqual; } + ; + +delimited_predicate: + '(' predicate ')' { $$ = $2; } + | EXISTS_P '(' expr ')' { $$ = makeItemUnary(jpiExists, $3); } + ; + +predicate: + delimited_predicate { $$ = $1; } + | expr comp_op expr { $$ = makeItemBinary($2, $1, $3); } + | predicate AND_P predicate { $$ = makeItemBinary(jpiAnd, $1, $3); } + | predicate OR_P predicate { $$ = makeItemBinary(jpiOr, $1, $3); } + | NOT_P delimited_predicate { $$ = makeItemUnary(jpiNot, $2); } + | '(' predicate ')' IS_P UNKNOWN_P { $$ = makeItemUnary(jpiIsUnknown, $2); } + | expr STARTS_P WITH_P starts_with_initial + { $$ = makeItemBinary(jpiStartsWith, $1, $4); } + | expr LIKE_REGEX_P STRING_P { $$ = makeItemLikeRegex($1, &$3, NULL); } + | expr LIKE_REGEX_P STRING_P FLAG_P STRING_P + { $$ = makeItemLikeRegex($1, &$3, &$5); } + ; + +starts_with_initial: + STRING_P { $$ = makeItemString(&$1); } + | VARIABLE_P { $$ = makeItemVariable(&$1); } + ; + +path_primary: + scalar_value { $$ = $1; } + | '$' { $$ = makeItemType(jpiRoot); } + | '@' { $$ = makeItemType(jpiCurrent); } + | LAST_P { $$ = makeItemType(jpiLast); } + ; + +accessor_expr: + path_primary { $$ = list_make1($1); } + | '(' expr ')' accessor_op { $$ = list_make2($2, $4); } + | '(' predicate ')' accessor_op { $$ = list_make2($2, $4); } + | accessor_expr accessor_op { $$ = lappend($1, $2); } + ; + +expr: + accessor_expr { $$ = makeItemList($1); } + | '(' expr ')' { $$ = $2; } + | '+' expr %prec UMINUS { $$ = makeItemUnary(jpiPlus, $2); } + | '-' expr %prec UMINUS { $$ = makeItemUnary(jpiMinus, $2); } + | expr '+' expr { $$ = makeItemBinary(jpiAdd, $1, $3); } + | expr '-' expr { $$ = makeItemBinary(jpiSub, $1, $3); } + | expr '*' expr { $$ = makeItemBinary(jpiMul, $1, $3); } + | expr '/' expr { $$ = makeItemBinary(jpiDiv, $1, $3); } + | expr '%' expr { $$ = makeItemBinary(jpiMod, $1, $3); } + ; + +index_elem: + expr { $$ = makeItemBinary(jpiSubscript, $1, NULL); } + | expr TO_P expr { $$ = makeItemBinary(jpiSubscript, $1, $3); } + ; + +index_list: + index_elem { $$ = list_make1($1); } + | index_list ',' index_elem { $$ = lappend($1, $3); } + ; + +array_accessor: + '[' '*' ']' { $$ = makeItemType(jpiAnyArray); } + | '[' index_list ']' { $$ = makeIndexArray($2); } + ; + +any_level: + INT_P { $$ = pg_atoi($1.val, 4, 0); } + | LAST_P { $$ = -1; } + ; + +any_path: + ANY_P { $$ = makeAny(0, -1); } + | ANY_P '{' any_level '}' { $$ = makeAny($3, $3); } + | ANY_P '{' any_level TO_P any_level '}' { $$ = makeAny($3, $5); } + ; + +accessor_op: + '.' key { $$ = $2; } + | '.' '*' { $$ = makeItemType(jpiAnyKey); } + | array_accessor { $$ = $1; } + | '.' any_path { $$ = $2; } + | '.' method '(' ')' { $$ = makeItemType($2); } + | '?' '(' predicate ')' { $$ = makeItemUnary(jpiFilter, $3); } + ; + +key: + key_name { $$ = makeItemKey(&$1); } + ; + +key_name: + IDENT_P + | STRING_P + | TO_P + | NULL_P + | TRUE_P + | FALSE_P + | IS_P + | UNKNOWN_P + | EXISTS_P + | STRICT_P + | LAX_P + | ABS_P + | SIZE_P + | TYPE_P + | FLOOR_P + | DOUBLE_P + | CEILING_P + | KEYVALUE_P + | LAST_P + | STARTS_P + | WITH_P + | LIKE_REGEX_P + | FLAG_P + ; + +method: + ABS_P { $$ = jpiAbs; } + | SIZE_P { $$ = jpiSize; } + | TYPE_P { $$ = jpiType; } + | FLOOR_P { $$ = jpiFloor; } + | DOUBLE_P { $$ = jpiDouble; } + | CEILING_P { $$ = jpiCeiling; } + | KEYVALUE_P { $$ = jpiKeyValue; } + ; +%% + diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l new file mode 100644 index 00000000000..110ea2160d9 --- /dev/null +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -0,0 +1,638 @@ +/*------------------------------------------------------------------------- + * + * jsonpath_scan.l + * Lexical parser for jsonpath datatype + * + * Copyright (c) 2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/jsonpath_scan.l + * + *------------------------------------------------------------------------- + */ + +%{ +#include "postgres.h" + +#include "mb/pg_wchar.h" +#include "nodes/pg_list.h" +#include "utils/jsonpath_scanner.h" + +static string scanstring; + +/* No reason to constrain amount of data slurped */ +/* #define YY_READ_BUF_SIZE 16777216 */ + +/* Handles to the buffer that the lexer uses internally */ +static YY_BUFFER_STATE scanbufhandle; +static char *scanbuf; +static int scanbuflen; + +static void addstring(bool init, char *s, int l); +static void addchar(bool init, char s); +static int checkSpecialVal(void); /* examine scanstring for the special + * value */ + +static void parseUnicode(char *s, int l); +static void parseHexChars(char *s, int l); + +/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */ +#undef fprintf +#define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg) + +static void +fprintf_to_ereport(const char *fmt, const char *msg) +{ + ereport(ERROR, (errmsg_internal("%s", msg))); +} + +#define yyerror jsonpath_yyerror +%} + +%option 8bit +%option never-interactive +%option nodefault +%option noinput +%option nounput +%option noyywrap +%option warn +%option prefix="jsonpath_yy" +%option bison-bridge +%option noyyalloc +%option noyyrealloc +%option noyyfree + +%x xQUOTED +%x xNONQUOTED +%x xVARQUOTED +%x xSINGLEQUOTED +%x xCOMMENT + +special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/] +any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f] +blank [ \t\n\r\f] +hex_dig [0-9A-Fa-f] +unicode \\u({hex_dig}{4}|\{{hex_dig}{1,6}\}) +hex_char \\x{hex_dig}{2} + + +%% + +<INITIAL>\&\& { return AND_P; } + +<INITIAL>\|\| { return OR_P; } + +<INITIAL>\! { return NOT_P; } + +<INITIAL>\*\* { return ANY_P; } + +<INITIAL>\< { return LESS_P; } + +<INITIAL>\<\= { return LESSEQUAL_P; } + +<INITIAL>\=\= { return EQUAL_P; } + +<INITIAL>\<\> { return NOTEQUAL_P; } + +<INITIAL>\!\= { return NOTEQUAL_P; } + +<INITIAL>\>\= { return GREATEREQUAL_P; } + +<INITIAL>\> { return GREATER_P; } + +<INITIAL>\${any}+ { + addstring(true, yytext + 1, yyleng - 1); + addchar(false, '\0'); + yylval->str = scanstring; + return VARIABLE_P; + } + +<INITIAL>\$\" { + addchar(true, '\0'); + BEGIN xVARQUOTED; + } + +<INITIAL>{special} { return *yytext; } + +<INITIAL>{blank}+ { /* ignore */ } + +<INITIAL>\/\* { + addchar(true, '\0'); + BEGIN xCOMMENT; + } + +<INITIAL>[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ /* float */ { + addstring(true, yytext, yyleng); + addchar(false, '\0'); + yylval->str = scanstring; + return NUMERIC_P; + } + +<INITIAL>\.[0-9]+[eE][+-]?[0-9]+ /* float */ { + addstring(true, yytext, yyleng); + addchar(false, '\0'); + yylval->str = scanstring; + return NUMERIC_P; + } + +<INITIAL>([0-9]+)?\.[0-9]+ { + addstring(true, yytext, yyleng); + addchar(false, '\0'); + yylval->str = scanstring; + return NUMERIC_P; + } + +<INITIAL>[0-9]+ { + addstring(true, yytext, yyleng); + addchar(false, '\0'); + yylval->str = scanstring; + return INT_P; + } + +<INITIAL>{any}+ { + addstring(true, yytext, yyleng); + BEGIN xNONQUOTED; + } + +<INITIAL>\" { + addchar(true, '\0'); + BEGIN xQUOTED; + } + +<INITIAL>\' { + addchar(true, '\0'); + BEGIN xSINGLEQUOTED; + } + +<INITIAL>\\ { + yyless(0); + addchar(true, '\0'); + BEGIN xNONQUOTED; + } + +<xNONQUOTED>{any}+ { + addstring(false, yytext, yyleng); + } + +<xNONQUOTED>{blank}+ { + yylval->str = scanstring; + BEGIN INITIAL; + return checkSpecialVal(); + } + + +<xNONQUOTED>\/\* { + yylval->str = scanstring; + BEGIN xCOMMENT; + } + +<xNONQUOTED>({special}|\"|\') { + yylval->str = scanstring; + yyless(0); + BEGIN INITIAL; + return checkSpecialVal(); + } + +<xNONQUOTED><<EOF>> { + yylval->str = scanstring; + BEGIN INITIAL; + return checkSpecialVal(); + } + +<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\[\"\'\\] { addchar(false, yytext[1]); } + +<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\b { addchar(false, '\b'); } + +<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\f { addchar(false, '\f'); } + +<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\n { addchar(false, '\n'); } + +<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\r { addchar(false, '\r'); } + +<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\t { addchar(false, '\t'); } + +<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\v { addchar(false, '\v'); } + +<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{unicode}+ { parseUnicode(yytext, yyleng); } + +<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{hex_char}+ { parseHexChars(yytext, yyleng); } + +<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\x { yyerror(NULL, "Hex character sequence is invalid"); } + +<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\u { yyerror(NULL, "Unicode sequence is invalid"); } + +<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\. { yyerror(NULL, "Escape sequence is invalid"); } + +<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\ { yyerror(NULL, "Unexpected end after backslash"); } + +<xQUOTED,xVARQUOTED,xSINGLEQUOTED><<EOF>> { yyerror(NULL, "Unexpected end of quoted string"); } + +<xQUOTED>\" { + yylval->str = scanstring; + BEGIN INITIAL; + return STRING_P; + } + +<xVARQUOTED>\" { + yylval->str = scanstring; + BEGIN INITIAL; + return VARIABLE_P; + } + +<xSINGLEQUOTED>\' { + yylval->str = scanstring; + BEGIN INITIAL; + return STRING_P; + } + +<xQUOTED,xVARQUOTED>[^\\\"]+ { addstring(false, yytext, yyleng); } + +<xSINGLEQUOTED>[^\\\']+ { addstring(false, yytext, yyleng); } + +<INITIAL><<EOF>> { yyterminate(); } + +<xCOMMENT>\*\/ { BEGIN INITIAL; } + +<xCOMMENT>[^\*]+ { } + +<xCOMMENT>\* { } + +<xCOMMENT><<EOF>> { yyerror(NULL, "Unexpected end of comment"); } + +%% + +void +jsonpath_yyerror(JsonPathParseResult **result, const char *message) +{ + if (*yytext == YY_END_OF_BUFFER_CHAR) + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("bad jsonpath representation"), + /* translator: %s is typically "syntax error" */ + errdetail("%s at end of input", message))); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("bad jsonpath representation"), + /* translator: first %s is typically "syntax error" */ + errdetail("%s at or near \"%s\"", message, yytext))); + } +} + +typedef struct keyword +{ + int16 len; + bool lowercase; + int val; + char *keyword; +} keyword; + +/* + * Array of key words should be sorted by length and then + * alphabetical order + */ + +static keyword keywords[] = { + { 2, false, IS_P, "is"}, + { 2, false, TO_P, "to"}, + { 3, false, ABS_P, "abs"}, + { 3, false, LAX_P, "lax"}, + { 4, false, FLAG_P, "flag"}, + { 4, false, LAST_P, "last"}, + { 4, true, NULL_P, "null"}, + { 4, false, SIZE_P, "size"}, + { 4, true, TRUE_P, "true"}, + { 4, false, TYPE_P, "type"}, + { 4, false, WITH_P, "with"}, + { 5, true, FALSE_P, "false"}, + { 5, false, FLOOR_P, "floor"}, + { 6, false, DOUBLE_P, "double"}, + { 6, false, EXISTS_P, "exists"}, + { 6, false, STARTS_P, "starts"}, + { 6, false, STRICT_P, "strict"}, + { 7, false, CEILING_P, "ceiling"}, + { 7, false, UNKNOWN_P, "unknown"}, + { 8, false, KEYVALUE_P, "keyvalue"}, + { 10,false, LIKE_REGEX_P, "like_regex"}, +}; + +static int +checkSpecialVal() +{ + int res = IDENT_P; + int diff; + keyword *StopLow = keywords, + *StopHigh = keywords + lengthof(keywords), + *StopMiddle; + + if (scanstring.len > keywords[lengthof(keywords) - 1].len) + return res; + + while(StopLow < StopHigh) + { + StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); + + if (StopMiddle->len == scanstring.len) + diff = pg_strncasecmp(StopMiddle->keyword, scanstring.val, + scanstring.len); + else + diff = StopMiddle->len - scanstring.len; + + if (diff < 0) + StopLow = StopMiddle + 1; + else if (diff > 0) + StopHigh = StopMiddle; + else + { + if (StopMiddle->lowercase) + diff = strncmp(StopMiddle->keyword, scanstring.val, + scanstring.len); + + if (diff == 0) + res = StopMiddle->val; + + break; + } + } + + return res; +} + +/* + * Called before any actual parsing is done + */ +static void +jsonpath_scanner_init(const char *str, int slen) +{ + if (slen <= 0) + slen = strlen(str); + + /* + * Might be left over after ereport() + */ + yy_init_globals(); + + /* + * Make a scan buffer with special termination needed by flex. + */ + + scanbuflen = slen; + scanbuf = palloc(slen + 2); + memcpy(scanbuf, str, slen); + scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR; + scanbufhandle = yy_scan_buffer(scanbuf, slen + 2); + + BEGIN(INITIAL); +} + + +/* + * Called after parsing is done to clean up after jsonpath_scanner_init() + */ +static void +jsonpath_scanner_finish(void) +{ + yy_delete_buffer(scanbufhandle); + pfree(scanbuf); +} + +static void +addstring(bool init, char *s, int l) +{ + if (init) + { + scanstring.total = 32; + scanstring.val = palloc(scanstring.total); + scanstring.len = 0; + } + + if (s && l) + { + while(scanstring.len + l + 1 >= scanstring.total) + { + scanstring.total *= 2; + scanstring.val = repalloc(scanstring.val, scanstring.total); + } + + memcpy(scanstring.val + scanstring.len, s, l); + scanstring.len += l; + } +} + +static void +addchar(bool init, char s) +{ + if (init) + { + scanstring.total = 32; + scanstring.val = palloc(scanstring.total); + scanstring.len = 0; + } + else if(scanstring.len + 1 >= scanstring.total) + { + scanstring.total *= 2; + scanstring.val = repalloc(scanstring.val, scanstring.total); + } + + scanstring.val[ scanstring.len ] = s; + if (s != '\0') + scanstring.len++; +} + +JsonPathParseResult * +parsejsonpath(const char *str, int len) +{ + JsonPathParseResult *parseresult; + + jsonpath_scanner_init(str, len); + + if (jsonpath_yyparse((void*)&parseresult) != 0) + jsonpath_yyerror(NULL, "bugus input"); + + jsonpath_scanner_finish(); + + return parseresult; +} + +static int +hexval(char c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'a' && c <= 'f') + return c - 'a' + 0xA; + if (c >= 'A' && c <= 'F') + return c - 'A' + 0xA; + elog(ERROR, "invalid hexadecimal digit"); + return 0; /* not reached */ +} + +static void +addUnicodeChar(int ch) +{ + /* + * For UTF8, replace the escape sequence by the actual + * utf8 character in lex->strval. Do this also for other + * encodings if the escape designates an ASCII character, + * otherwise raise an error. + */ + + if (ch == 0) + { + /* We can't allow this, since our TEXT type doesn't */ + ereport(ERROR, + (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), + errmsg("unsupported Unicode escape sequence"), + errdetail("\\u0000 cannot be converted to text."))); + } + else if (GetDatabaseEncoding() == PG_UTF8) + { + char utf8str[5]; + int utf8len; + + unicode_to_utf8(ch, (unsigned char *) utf8str); + utf8len = pg_utf_mblen((unsigned char *) utf8str); + addstring(false, utf8str, utf8len); + } + else if (ch <= 0x007f) + { + /* + * This is the only way to designate things like a + * form feed character in JSON, so it's useful in all + * encodings. + */ + addchar(false, (char) ch); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type jsonpath"), + errdetail("Unicode escape values cannot be used for code " + "point values above 007F when the server encoding " + "is not UTF8."))); + } +} + +static void +addUnicode(int ch, int *hi_surrogate) +{ + if (ch >= 0xd800 && ch <= 0xdbff) + { + if (*hi_surrogate != -1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type jsonpath"), + errdetail("Unicode high surrogate must not follow " + "a high surrogate."))); + *hi_surrogate = (ch & 0x3ff) << 10; + return; + } + else if (ch >= 0xdc00 && ch <= 0xdfff) + { + if (*hi_surrogate == -1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type jsonpath"), + errdetail("Unicode low surrogate must follow a high " + "surrogate."))); + ch = 0x10000 + *hi_surrogate + (ch & 0x3ff); + *hi_surrogate = -1; + } + else if (*hi_surrogate != -1) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type jsonpath"), + errdetail("Unicode low surrogate must follow a high " + "surrogate."))); + } + + addUnicodeChar(ch); +} + +/* + * parseUnicode was adopted from json_lex_string() in + * src/backend/utils/adt/json.c + */ +static void +parseUnicode(char *s, int l) +{ + int i; + int hi_surrogate = -1; + + for (i = 2; i < l; i += 2) /* skip '\u' */ + { + int ch = 0; + int j; + + if (s[i] == '{') /* parse '\u{XX...}' */ + { + while (s[++i] != '}' && i < l) + ch = (ch << 4) | hexval(s[i]); + i++; /* ski p '}' */ + } + else /* parse '\uXXXX' */ + { + for (j = 0; j < 4 && i < l; j++) + ch = (ch << 4) | hexval(s[i++]); + } + + addUnicode(ch, &hi_surrogate); + } + + if (hi_surrogate != -1) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type jsonpath"), + errdetail("Unicode low surrogate must follow a high " + "surrogate."))); + } +} + +static void +parseHexChars(char *s, int l) +{ + int i; + + Assert(l % 4 /* \xXX */ == 0); + + for (i = 0; i < l / 4; i++) + { + int ch = (hexval(s[i * 4 + 2]) << 4) | hexval(s[i * 4 + 3]); + + addUnicodeChar(ch); + } +} + +/* + * Interface functions to make flex use palloc() instead of malloc(). + * It'd be better to make these static, but flex insists otherwise. + */ + +void * +jsonpath_yyalloc(yy_size_t bytes) +{ + return palloc(bytes); +} + +void * +jsonpath_yyrealloc(void *ptr, yy_size_t bytes) +{ + if (ptr) + return repalloc(ptr, bytes); + else + return palloc(bytes); +} + +void +jsonpath_yyfree(void *ptr) +{ + if (ptr) + pfree(ptr); +} + diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index 4ef8a9290ae..da13a875eb0 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -133,7 +133,7 @@ static Datum build_regexp_split_result(regexp_matches_ctx *splitctx); * Pattern is given in the database encoding. We internally convert to * an array of pg_wchar, which is what Spencer's regex package wants. */ -static regex_t * +regex_t * RE_compile_and_cache(text *text_re, int cflags, Oid collation) { int text_re_len = VARSIZE_ANY_EXHDR(text_re); @@ -339,7 +339,7 @@ RE_execute(regex_t *re, char *dat, int dat_len, * Both pattern and data are given in the database encoding. We internally * convert to array of pg_wchar which is what Spencer's regex package wants. */ -static bool +bool RE_compile_and_execute(text *text_re, char *dat, int dat_len, int cflags, Oid collation, int nmatch, regmatch_t *pmatch) diff --git a/src/backend/utils/errcodes.txt b/src/backend/utils/errcodes.txt index 4f7b9b6e5c9..16f5ca233a9 100644 --- a/src/backend/utils/errcodes.txt +++ b/src/backend/utils/errcodes.txt @@ -206,6 +206,21 @@ Section: Class 22 - Data Exception 2200N E ERRCODE_INVALID_XML_CONTENT invalid_xml_content 2200S E ERRCODE_INVALID_XML_COMMENT invalid_xml_comment 2200T E ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION invalid_xml_processing_instruction +22030 E ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE duplicate_json_object_key_value +22032 E ERRCODE_INVALID_JSON_TEXT invalid_json_text +22033 E ERRCODE_INVALID_JSON_SUBSCRIPT invalid_json_subscript +22034 E ERRCODE_MORE_THAN_ONE_JSON_ITEM more_than_one_json_item +22035 E ERRCODE_NO_JSON_ITEM no_json_item +22036 E ERRCODE_NON_NUMERIC_JSON_ITEM non_numeric_json_item +22037 E ERRCODE_NON_UNIQUE_KEYS_IN_JSON_OBJECT non_unique_keys_in_json_object +22038 E ERRCODE_SINGLETON_JSON_ITEM_REQUIRED singleton_json_item_required +22039 E ERRCODE_JSON_ARRAY_NOT_FOUND json_array_not_found +2203A E ERRCODE_JSON_MEMBER_NOT_FOUND json_member_not_found +2203B E ERRCODE_JSON_NUMBER_NOT_FOUND json_number_not_found +2203C E ERRCODE_JSON_OBJECT_NOT_FOUND object_not_found +2203F E ERRCODE_JSON_SCALAR_REQUIRED json_scalar_required +2203D E ERRCODE_TOO_MANY_JSON_ARRAY_ELEMENTS too_many_json_array_elements +2203E E ERRCODE_TOO_MANY_JSON_OBJECT_MEMBERS too_many_json_object_members Section: Class 23 - Integrity Constraint Violation |