diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2020-12-09 12:40:37 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2020-12-09 12:40:37 -0500 |
commit | c7aba7c14efdbd9fc1bb44b4cb83bedee0c6a6fc (patch) | |
tree | d6980ca2951d353475957a56b58866cd4fafcdd3 /src/backend/parser | |
parent | 8b069ef5dca97cd737a5fd64c420df3cd61ec1c9 (diff) | |
download | postgresql-c7aba7c14efdbd9fc1bb44b4cb83bedee0c6a6fc.tar.gz postgresql-c7aba7c14efdbd9fc1bb44b4cb83bedee0c6a6fc.zip |
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
Diffstat (limited to 'src/backend/parser')
-rw-r--r-- | src/backend/parser/parse_coerce.c | 5 | ||||
-rw-r--r-- | src/backend/parser/parse_collate.c | 23 | ||||
-rw-r--r-- | src/backend/parser/parse_expr.c | 6 | ||||
-rw-r--r-- | src/backend/parser/parse_node.c | 231 | ||||
-rw-r--r-- | src/backend/parser/parse_target.c | 48 |
5 files changed, 119 insertions, 194 deletions
diff --git a/src/backend/parser/parse_coerce.c b/src/backend/parser/parse_coerce.c index a2924e3d1ce..da6c3ae4b5f 100644 --- a/src/backend/parser/parse_coerce.c +++ b/src/backend/parser/parse_coerce.c @@ -26,6 +26,7 @@ #include "parser/parse_type.h" #include "utils/builtins.h" #include "utils/datum.h" /* needed for datumIsEqual() */ +#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/syscache.h" #include "utils/typcache.h" @@ -2854,8 +2855,8 @@ find_typmod_coercion_function(Oid typeId, targetType = typeidType(typeId); typeForm = (Form_pg_type) GETSTRUCT(targetType); - /* Check for a varlena array type */ - if (typeForm->typelem != InvalidOid && typeForm->typlen == -1) + /* Check for a "true" array type */ + if (IsTrueArrayType(typeForm)) { /* Yes, switch our attention to the element type */ typeId = typeForm->typelem; diff --git a/src/backend/parser/parse_collate.c b/src/backend/parser/parse_collate.c index bf800f5937b..13e62a20156 100644 --- a/src/backend/parser/parse_collate.c +++ b/src/backend/parser/parse_collate.c @@ -667,6 +667,29 @@ assign_collations_walker(Node *node, assign_collations_context *context) &loccontext); } break; + case T_SubscriptingRef: + { + /* + * The subscripts are treated as independent + * expressions not contributing to the node's + * collation. Only the container, and the source + * expression if any, contribute. (This models + * the old behavior, in which the subscripts could + * be counted on to be integers and thus not + * contribute anything.) + */ + SubscriptingRef *sbsref = (SubscriptingRef *) node; + + assign_expr_collations(context->pstate, + (Node *) sbsref->refupperindexpr); + assign_expr_collations(context->pstate, + (Node *) sbsref->reflowerindexpr); + (void) assign_collations_walker((Node *) sbsref->refexpr, + &loccontext); + (void) assign_collations_walker((Node *) sbsref->refassgnexpr, + &loccontext); + } + break; default: /* diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 1e62d31aca7..ffc96e2a6fa 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -406,10 +406,9 @@ transformIndirection(ParseState *pstate, A_Indirection *ind) result = (Node *) transformContainerSubscripts(pstate, result, exprType(result), - InvalidOid, exprTypmod(result), subscripts, - NULL); + false); subscripts = NIL; newresult = ParseFuncOrColumn(pstate, @@ -429,10 +428,9 @@ transformIndirection(ParseState *pstate, A_Indirection *ind) result = (Node *) transformContainerSubscripts(pstate, result, exprType(result), - InvalidOid, exprTypmod(result), subscripts, - NULL); + false); return result; } diff --git a/src/backend/parser/parse_node.c b/src/backend/parser/parse_node.c index 6e98fe55fc4..e90f6c9d010 100644 --- a/src/backend/parser/parse_node.c +++ b/src/backend/parser/parse_node.c @@ -20,6 +20,7 @@ #include "mb/pg_wchar.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "nodes/subscripting.h" #include "parser/parse_coerce.h" #include "parser/parse_expr.h" #include "parser/parse_relation.h" @@ -182,23 +183,16 @@ pcb_error_callback(void *arg) /* * transformContainerType() - * Identify the types involved in a subscripting operation for container + * Identify the actual container type for a subscripting operation. * - * - * On entry, containerType/containerTypmod identify the type of the input value - * to be subscripted (which could be a domain type). These are modified if - * necessary to identify the actual container type and typmod, and the - * container's element type is returned. An error is thrown if the input isn't - * an array type. + * containerType/containerTypmod are modified if necessary to identify + * the actual container type and typmod. This mainly involves smashing + * any domain to its base type, but there are some special considerations. + * Note that caller still needs to check if the result type is a container. */ -Oid +void transformContainerType(Oid *containerType, int32 *containerTypmod) { - Oid origContainerType = *containerType; - Oid elementType; - HeapTuple type_tuple_container; - Form_pg_type type_struct_container; - /* * If the input is a domain, smash to base type, and extract the actual * typmod to be applied to the base type. Subscripting a domain is an @@ -209,35 +203,16 @@ transformContainerType(Oid *containerType, int32 *containerTypmod) *containerType = getBaseTypeAndTypmod(*containerType, containerTypmod); /* - * Here is an array specific code. We treat int2vector and oidvector as - * though they were domains over int2[] and oid[]. This is needed because - * array slicing could create an array that doesn't satisfy the - * dimensionality constraints of the xxxvector type; so we want the result - * of a slice operation to be considered to be of the more general type. + * We treat int2vector and oidvector as though they were domains over + * int2[] and oid[]. This is needed because array slicing could create an + * array that doesn't satisfy the dimensionality constraints of the + * xxxvector type; so we want the result of a slice operation to be + * considered to be of the more general type. */ if (*containerType == INT2VECTOROID) *containerType = INT2ARRAYOID; else if (*containerType == OIDVECTOROID) *containerType = OIDARRAYOID; - - /* Get the type tuple for the container */ - type_tuple_container = SearchSysCache1(TYPEOID, ObjectIdGetDatum(*containerType)); - if (!HeapTupleIsValid(type_tuple_container)) - elog(ERROR, "cache lookup failed for type %u", *containerType); - type_struct_container = (Form_pg_type) GETSTRUCT(type_tuple_container); - - /* needn't check typisdefined since this will fail anyway */ - - elementType = type_struct_container->typelem; - if (elementType == InvalidOid) - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("cannot subscript type %s because it is not an array", - format_type_be(origContainerType)))); - - ReleaseSysCache(type_tuple_container); - - return elementType; } /* @@ -249,13 +224,14 @@ transformContainerType(Oid *containerType, int32 *containerTypmod) * an expression that represents the result of extracting a single container * element or a container slice. * - * In a container assignment, we are given a destination container value plus a - * source value that is to be assigned to a single element or a slice of that - * container. We produce an expression that represents the new container value - * with the source data inserted into the right part of the container. + * Container assignments are treated basically the same as container fetches + * here. The caller will modify the result node to insert the source value + * that is to be assigned to the element or slice that a fetch would have + * retrieved. The execution result will be a new container value with + * the source value inserted into the right part of the container. * - * For both cases, if the source container is of a domain-over-array type, - * the result is of the base array type or its element type; essentially, + * For both cases, if the source is of a domain-over-container type, the + * result is the same as if it had been of the container type; essentially, * we must fold a domain to its base type before applying subscripting. * (Note that int2vector and oidvector are treated as domains here.) * @@ -264,48 +240,48 @@ transformContainerType(Oid *containerType, int32 *containerTypmod) * containerType OID of container's datatype (should match type of * containerBase, or be the base type of containerBase's * domain type) - * elementType OID of container's element type (fetch with - * transformContainerType, or pass InvalidOid to do it here) - * containerTypMod typmod for the container (which is also typmod for the - * elements) + * containerTypMod typmod for the container * indirection Untransformed list of subscripts (must not be NIL) - * assignFrom NULL for container fetch, else transformed expression for - * source. + * isAssignment True if this will become a container assignment. */ SubscriptingRef * transformContainerSubscripts(ParseState *pstate, Node *containerBase, Oid containerType, - Oid elementType, int32 containerTypMod, List *indirection, - Node *assignFrom) + bool isAssignment) { + SubscriptingRef *sbsref; + const SubscriptRoutines *sbsroutines; + Oid elementType; bool isSlice = false; - List *upperIndexpr = NIL; - List *lowerIndexpr = NIL; ListCell *idx; - SubscriptingRef *sbsref; /* - * Caller may or may not have bothered to determine elementType. Note - * that if the caller did do so, containerType/containerTypMod must be as - * modified by transformContainerType, ie, smash domain to base type. + * Determine the actual container type, smashing any domain. In the + * assignment case the caller already did this, since it also needs to + * know the actual container type. */ - if (!OidIsValid(elementType)) - elementType = transformContainerType(&containerType, &containerTypMod); + if (!isAssignment) + transformContainerType(&containerType, &containerTypMod); /* + * Verify that the container type is subscriptable, and get its support + * functions and typelem. + */ + sbsroutines = getSubscriptingRoutines(containerType, &elementType); + + /* + * Detect whether any of the indirection items are slice specifiers. + * * A list containing only simple subscripts refers to a single container * element. If any of the items are slice specifiers (lower:upper), then - * the subscript expression means a container slice operation. In this - * case, we convert any non-slice items to slices by treating the single - * subscript as the upper bound and supplying an assumed lower bound of 1. - * We have to prescan the list to see if there are any slice items. + * the subscript expression means a container slice operation. */ foreach(idx, indirection) { - A_Indices *ai = (A_Indices *) lfirst(idx); + A_Indices *ai = lfirst_node(A_Indices, idx); if (ai->is_slice) { @@ -315,120 +291,35 @@ transformContainerSubscripts(ParseState *pstate, } /* - * Transform the subscript expressions. - */ - foreach(idx, indirection) - { - A_Indices *ai = lfirst_node(A_Indices, idx); - Node *subexpr; - - if (isSlice) - { - if (ai->lidx) - { - subexpr = transformExpr(pstate, ai->lidx, pstate->p_expr_kind); - /* If it's not int4 already, try to coerce */ - subexpr = coerce_to_target_type(pstate, - subexpr, exprType(subexpr), - INT4OID, -1, - COERCION_ASSIGNMENT, - COERCE_IMPLICIT_CAST, - -1); - if (subexpr == NULL) - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("array subscript must have type integer"), - parser_errposition(pstate, exprLocation(ai->lidx)))); - } - else if (!ai->is_slice) - { - /* Make a constant 1 */ - subexpr = (Node *) makeConst(INT4OID, - -1, - InvalidOid, - sizeof(int32), - Int32GetDatum(1), - false, - true); /* pass by value */ - } - else - { - /* Slice with omitted lower bound, put NULL into the list */ - subexpr = NULL; - } - lowerIndexpr = lappend(lowerIndexpr, subexpr); - } - else - Assert(ai->lidx == NULL && !ai->is_slice); - - if (ai->uidx) - { - subexpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind); - /* If it's not int4 already, try to coerce */ - subexpr = coerce_to_target_type(pstate, - subexpr, exprType(subexpr), - INT4OID, -1, - COERCION_ASSIGNMENT, - COERCE_IMPLICIT_CAST, - -1); - if (subexpr == NULL) - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("array subscript must have type integer"), - parser_errposition(pstate, exprLocation(ai->uidx)))); - } - else - { - /* Slice with omitted upper bound, put NULL into the list */ - Assert(isSlice && ai->is_slice); - subexpr = NULL; - } - upperIndexpr = lappend(upperIndexpr, subexpr); - } - - /* - * If doing an array store, coerce the source value to the right type. - * (This should agree with the coercion done by transformAssignedExpr.) - */ - if (assignFrom != NULL) - { - Oid typesource = exprType(assignFrom); - Oid typeneeded = isSlice ? containerType : elementType; - Node *newFrom; - - newFrom = coerce_to_target_type(pstate, - assignFrom, typesource, - typeneeded, containerTypMod, - COERCION_ASSIGNMENT, - COERCE_IMPLICIT_CAST, - -1); - if (newFrom == NULL) - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("array assignment requires type %s" - " but expression is of type %s", - format_type_be(typeneeded), - format_type_be(typesource)), - errhint("You will need to rewrite or cast the expression."), - parser_errposition(pstate, exprLocation(assignFrom)))); - assignFrom = newFrom; - } - - /* * Ready to build the SubscriptingRef node. */ - sbsref = (SubscriptingRef *) makeNode(SubscriptingRef); - if (assignFrom != NULL) - sbsref->refassgnexpr = (Expr *) assignFrom; + sbsref = makeNode(SubscriptingRef); sbsref->refcontainertype = containerType; sbsref->refelemtype = elementType; + /* refrestype is to be set by container-specific logic */ sbsref->reftypmod = containerTypMod; /* refcollid will be set by parse_collate.c */ - sbsref->refupperindexpr = upperIndexpr; - sbsref->reflowerindexpr = lowerIndexpr; + /* refupperindexpr, reflowerindexpr are to be set by container logic */ sbsref->refexpr = (Expr *) containerBase; - sbsref->refassgnexpr = (Expr *) assignFrom; + sbsref->refassgnexpr = NULL; /* caller will fill if it's an assignment */ + + /* + * Call the container-type-specific logic to transform the subscripts and + * determine the subscripting result type. + */ + sbsroutines->transform(sbsref, indirection, pstate, + isSlice, isAssignment); + + /* + * Verify we got a valid type (this defends, for example, against someone + * using array_subscript_handler as typsubscript without setting typelem). + */ + if (!OidIsValid(sbsref->refrestype)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("cannot subscript type %s because it does not support subscripting", + format_type_be(containerType)))); return sbsref; } diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c index ce68663cc2c..3dda8e2847d 100644 --- a/src/backend/parser/parse_target.c +++ b/src/backend/parser/parse_target.c @@ -861,7 +861,7 @@ transformAssignmentIndirection(ParseState *pstate, if (targetIsSubscripting) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("array assignment to \"%s\" requires type %s" + errmsg("subscripted assignment to \"%s\" requires type %s" " but expression is of type %s", targetName, format_type_be(targetTypeId), @@ -901,26 +901,37 @@ transformAssignmentSubscripts(ParseState *pstate, int location) { Node *result; + SubscriptingRef *sbsref; Oid containerType; int32 containerTypMod; - Oid elementTypeId; Oid typeNeeded; + int32 typmodNeeded; Oid collationNeeded; Assert(subscripts != NIL); - /* Identify the actual array type and element type involved */ + /* Identify the actual container type involved */ containerType = targetTypeId; containerTypMod = targetTypMod; - elementTypeId = transformContainerType(&containerType, &containerTypMod); + transformContainerType(&containerType, &containerTypMod); - /* Identify type that RHS must provide */ - typeNeeded = isSlice ? containerType : elementTypeId; + /* Process subscripts and identify required type for RHS */ + sbsref = transformContainerSubscripts(pstate, + basenode, + containerType, + containerTypMod, + subscripts, + true); + + typeNeeded = sbsref->refrestype; + typmodNeeded = sbsref->reftypmod; /* - * container normally has same collation as elements, but there's an - * exception: we might be subscripting a domain over a container type. In - * that case use collation of the base type. + * Container normally has same collation as its elements, but there's an + * exception: we might be subscripting a domain over a container type. In + * that case use collation of the base type. (This is shaky for arbitrary + * subscripting semantics, but it doesn't matter all that much since we + * only use this to label the collation of a possible CaseTestExpr.) */ if (containerType == targetTypeId) collationNeeded = targetCollation; @@ -933,21 +944,22 @@ transformAssignmentSubscripts(ParseState *pstate, targetName, true, typeNeeded, - containerTypMod, + typmodNeeded, collationNeeded, indirection, next_indirection, rhs, location); - /* process subscripts */ - result = (Node *) transformContainerSubscripts(pstate, - basenode, - containerType, - elementTypeId, - containerTypMod, - subscripts, - rhs); + /* + * Insert the already-properly-coerced RHS into the SubscriptingRef. Then + * set refrestype and reftypmod back to the container type's values. + */ + sbsref->refassgnexpr = (Expr *) rhs; + sbsref->refrestype = containerType; + sbsref->reftypmod = containerTypMod; + + result = (Node *) sbsref; /* If target was a domain over container, need to coerce up to the domain */ if (containerType != targetTypeId) |