aboutsummaryrefslogtreecommitdiff
path: root/src/backend/parser/parse_collate.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/parser/parse_collate.c')
-rw-r--r--src/backend/parser/parse_collate.c763
1 files changed, 763 insertions, 0 deletions
diff --git a/src/backend/parser/parse_collate.c b/src/backend/parser/parse_collate.c
new file mode 100644
index 00000000000..0b77e3ea2b7
--- /dev/null
+++ b/src/backend/parser/parse_collate.c
@@ -0,0 +1,763 @@
+/*-------------------------------------------------------------------------
+ *
+ * parse_collate.c
+ * Routines for assigning collation information.
+ *
+ * We choose to handle collation analysis in a post-pass over the output
+ * of expression parse analysis. This is because we need more state to
+ * perform this processing than is needed in the finished tree. If we
+ * did it on-the-fly while building the tree, all that state would have
+ * to be kept in expression node trees permanently. This way, the extra
+ * storage is just local variables in this recursive routine.
+ *
+ * The info that is actually saved in the finished tree is:
+ * 1. The output collation of each expression node, or InvalidOid if it
+ * returns a noncollatable data type. This can also be InvalidOid if the
+ * result type is collatable but the collation is indeterminate.
+ * 2. The collation to be used in executing each function. InvalidOid means
+ * that there are no collatable inputs or their collation is indeterminate.
+ * This value is only stored in node types that might call collation-using
+ * functions.
+ *
+ * You might think we could get away with storing only one collation per
+ * node, but the two concepts really need to be kept distinct. Otherwise
+ * it's too confusing when a function produces a collatable output type but
+ * has no collatable inputs or produces noncollatable output from collatable
+ * inputs.
+ *
+ * Cases with indeterminate collation might result in an error being thrown
+ * at runtime. If we knew exactly which functions require collation
+ * information, we could throw those errors at parse time instead.
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/parser/parse_collate.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_collation.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/parse_collate.h"
+#include "utils/lsyscache.h"
+
+
+/*
+ * Collation strength (the SQL standard calls this "derivation"). Order is
+ * chosen to allow comparisons to work usefully. Note: the standard doesn't
+ * seem to distingish between NONE and CONFLICT.
+ */
+typedef enum
+{
+ COLLATE_NONE, /* expression is of a noncollatable datatype */
+ COLLATE_IMPLICIT, /* collation was derived implicitly */
+ COLLATE_CONFLICT, /* we had a conflict of implicit collations */
+ COLLATE_EXPLICIT /* collation was derived explicitly */
+} CollateStrength;
+
+typedef struct
+{
+ ParseState *pstate; /* parse state (for error reporting) */
+ Oid collation; /* OID of current collation, if any */
+ CollateStrength strength; /* strength of current collation choice */
+ int location; /* location of expr that set collation */
+ /* Remaining fields are only valid when strength == COLLATE_CONFLICT */
+ Oid collation2; /* OID of conflicting collation */
+ int location2; /* location of expr that set collation2 */
+} assign_collations_context;
+
+static bool assign_query_collations_walker(Node *node, ParseState *pstate);
+static bool assign_collations_walker(Node *node,
+ assign_collations_context *context);
+
+
+/*
+ * assign_query_collations()
+ * Mark all expressions in the given Query with collation information.
+ *
+ * This should be applied to each Query after completion of parse analysis
+ * for expressions. Note that we do not recurse into sub-Queries, since
+ * those should have been processed when built.
+ */
+void
+assign_query_collations(ParseState *pstate, Query *query)
+{
+ /*
+ * We just use query_tree_walker() to visit all the contained expressions.
+ * We can skip the rangetable and CTE subqueries, though, since RTEs and
+ * subqueries had better have been processed already (else Vars referring
+ * to them would not get created with the right collation).
+ */
+ (void) query_tree_walker(query,
+ assign_query_collations_walker,
+ (void *) pstate,
+ QTW_IGNORE_RANGE_TABLE |
+ QTW_IGNORE_CTE_SUBQUERIES);
+}
+
+/*
+ * Walker for assign_query_collations
+ *
+ * Each expression found by query_tree_walker is processed independently.
+ * Note that query_tree_walker may pass us a whole List, such as the
+ * targetlist, in which case each subexpression must be processed
+ * independently --- we don't want to bleat if two different targetentries
+ * have different collations.
+ */
+static bool
+assign_query_collations_walker(Node *node, ParseState *pstate)
+{
+ /* Need do nothing for empty subexpressions */
+ if (node == NULL)
+ return false;
+
+ /*
+ * We don't want to recurse into a set-operations tree; it's already
+ * been fully processed in transformSetOperationStmt.
+ */
+ if (IsA(node, SetOperationStmt))
+ return false;
+
+ if (IsA(node, List))
+ assign_list_collations(pstate, (List *) node);
+ else
+ assign_expr_collations(pstate, node);
+
+ return false;
+}
+
+/*
+ * assign_list_collations()
+ * Mark all nodes in the list of expressions with collation information.
+ *
+ * The list member expressions are processed independently; they do not have
+ * to share a common collation.
+ */
+void
+assign_list_collations(ParseState *pstate, List *exprs)
+{
+ ListCell *lc;
+
+ foreach(lc, exprs)
+ {
+ Node *node = (Node *) lfirst(lc);
+
+ assign_expr_collations(pstate, node);
+ }
+}
+
+/*
+ * assign_expr_collations()
+ * Mark all nodes in the given expression tree with collation information.
+ *
+ * This is exported for the benefit of various utility commands that process
+ * expressions without building a complete Query. It should be applied after
+ * calling transformExpr() plus any expression-modifying operations such as
+ * coerce_to_boolean().
+ */
+void
+assign_expr_collations(ParseState *pstate, Node *expr)
+{
+ assign_collations_context context;
+
+ /* initialize context for tree walk */
+ context.pstate = pstate;
+ context.collation = InvalidOid;
+ context.strength = COLLATE_NONE;
+ context.location = -1;
+
+ /* and away we go */
+ (void) assign_collations_walker(expr, &context);
+}
+
+/*
+ * select_common_collation()
+ * Identify a common collation for a list of expressions.
+ *
+ * The expressions should all return the same datatype, else this is not
+ * terribly meaningful.
+ *
+ * none_ok means that it is permitted to return InvalidOid, indicating that
+ * no common collation could be identified, even for collatable datatypes.
+ * Otherwise, an error is thrown for conflict of implicit collations.
+ *
+ * In theory, none_ok = true reflects the rules of SQL standard clause "Result
+ * of data type combinations", none_ok = false reflects the rules of clause
+ * "Collation determination" (in some cases invoked via "Grouping
+ * operations").
+ */
+Oid
+select_common_collation(ParseState *pstate, List *exprs, bool none_ok)
+{
+ assign_collations_context context;
+
+ /* initialize context for tree walk */
+ context.pstate = pstate;
+ context.collation = InvalidOid;
+ context.strength = COLLATE_NONE;
+ context.location = -1;
+
+ /* and away we go */
+ (void) assign_collations_walker((Node *) exprs, &context);
+
+ /* deal with collation conflict */
+ if (context.strength == COLLATE_CONFLICT)
+ {
+ if (none_ok)
+ return InvalidOid;
+ ereport(ERROR,
+ (errcode(ERRCODE_COLLATION_MISMATCH),
+ errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
+ get_collation_name(context.collation),
+ get_collation_name(context.collation2)),
+ errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
+ parser_errposition(context.pstate, context.location2)));
+ }
+
+ /*
+ * Note: if strength is still COLLATE_NONE, we'll return InvalidOid, but
+ * that's okay because it must mean none of the expressions returned
+ * collatable datatypes.
+ */
+ return context.collation;
+}
+
+/*
+ * assign_collations_walker()
+ * Recursive guts of collation processing.
+ *
+ * Nodes with no children (eg, Vars, Consts, Params) must have been marked
+ * when built. All upper-level nodes are marked here.
+ *
+ * Note: if this is invoked directly on a List, it will attempt to infer a
+ * common collation for all the list members. In particular, it will throw
+ * error if there are conflicting explicit collations for different members.
+ */
+static bool
+assign_collations_walker(Node *node, assign_collations_context *context)
+{
+ assign_collations_context loccontext;
+ Oid collation;
+ CollateStrength strength;
+ int location;
+
+ /* Need do nothing for empty subexpressions */
+ if (node == NULL)
+ return false;
+
+ /*
+ * Prepare for recursion. For most node types, though not all, the
+ * first thing we do is recurse to process all nodes below this one.
+ * Each level of the tree has its own local context.
+ */
+ loccontext.pstate = context->pstate;
+ loccontext.collation = InvalidOid;
+ loccontext.strength = COLLATE_NONE;
+ loccontext.location = -1;
+
+ /*
+ * Recurse if appropriate, then determine the collation for this node.
+ *
+ * Note: the general cases are at the bottom of the switch, after various
+ * special cases.
+ */
+ switch (nodeTag(node))
+ {
+ case T_CollateExpr:
+ {
+ /*
+ * COLLATE sets an explicitly derived collation, regardless of
+ * what the child state is. But we must recurse to set up
+ * collation info below here.
+ */
+ CollateExpr *expr = (CollateExpr *) node;
+
+ (void) expression_tree_walker(node,
+ assign_collations_walker,
+ (void *) &loccontext);
+
+ collation = expr->collOid;
+ Assert(OidIsValid(collation));
+ strength = COLLATE_EXPLICIT;
+ location = expr->location;
+ }
+ break;
+ case T_FieldSelect:
+ {
+ /*
+ * FieldSelect is a special case because the field may have
+ * a non-default collation, in which case we should use that.
+ * The field's collation was already looked up and saved
+ * in the node.
+ */
+ FieldSelect *expr = (FieldSelect *) node;
+
+ /* ... but first, recurse */
+ (void) expression_tree_walker(node,
+ assign_collations_walker,
+ (void *) &loccontext);
+
+ if (OidIsValid(expr->resultcollid))
+ {
+ /* Node's result type is collatable. */
+ if (expr->resultcollid == DEFAULT_COLLATION_OID)
+ {
+ /*
+ * The immediate input node necessarily yields a
+ * composite type, so it will have no exposed
+ * collation. However, if we are selecting a field
+ * from a function returning composite, see if we
+ * can bubble up a collation from the function's
+ * input. XXX this is a bit of a hack, rethink ...
+ */
+ if (IsA(expr->arg, FuncExpr))
+ {
+ FuncExpr *fexpr = (FuncExpr *) expr->arg;
+
+ if (OidIsValid(fexpr->inputcollid))
+ expr->resultcollid = fexpr->inputcollid;
+ }
+ }
+ /* Pass up field's collation as an implicit choice. */
+ collation = expr->resultcollid;
+ strength = COLLATE_IMPLICIT;
+ location = exprLocation(node);
+ }
+ else
+ {
+ /* Node's result type isn't collatable. */
+ collation = InvalidOid;
+ strength = COLLATE_NONE;
+ location = -1; /* won't be used */
+ }
+ }
+ break;
+ case T_CaseExpr:
+ {
+ /*
+ * CaseExpr is a special case because we do not want to
+ * recurse into the test expression (if any). It was
+ * already marked with collations during transformCaseExpr,
+ * and furthermore its collation is not relevant to the
+ * result of the CASE --- only the output expressions are.
+ * So we can't use expression_tree_walker here.
+ */
+ CaseExpr *expr = (CaseExpr *) node;
+ Oid typcollation;
+ ListCell *lc;
+
+ foreach(lc, expr->args)
+ {
+ CaseWhen *when = (CaseWhen *) lfirst(lc);
+
+ Assert(IsA(when, CaseWhen));
+ /*
+ * The condition expressions mustn't affect the CASE's
+ * result collation either; but since they are known to
+ * yield boolean, it's safe to recurse directly on them
+ * --- they won't change loccontext.
+ */
+ (void) assign_collations_walker((Node *) when->expr,
+ &loccontext);
+ (void) assign_collations_walker((Node *) when->result,
+ &loccontext);
+ }
+ (void) assign_collations_walker((Node *) expr->defresult,
+ &loccontext);
+
+ /*
+ * Now determine the CASE's output collation. This is the
+ * same as the general case below.
+ */
+ typcollation = get_typcollation(exprType(node));
+ if (OidIsValid(typcollation))
+ {
+ /* Node's result is collatable; what about its input? */
+ if (loccontext.strength > COLLATE_NONE)
+ {
+ /* Collation state bubbles up from children. */
+ collation = loccontext.collation;
+ strength = loccontext.strength;
+ location = loccontext.location;
+ }
+ else
+ {
+ /*
+ * Collatable output produced without any collatable
+ * input. Use the type's collation (which is usually
+ * DEFAULT_COLLATION_OID, but might be different for a
+ * domain).
+ */
+ collation = typcollation;
+ strength = COLLATE_IMPLICIT;
+ location = exprLocation(node);
+ }
+ }
+ else
+ {
+ /* Node's result type isn't collatable. */
+ collation = InvalidOid;
+ strength = COLLATE_NONE;
+ location = -1; /* won't be used */
+ }
+
+ /*
+ * Save the state into the expression node. We know it
+ * doesn't care about input collation.
+ */
+ if (strength == COLLATE_CONFLICT)
+ exprSetCollation(node, InvalidOid);
+ else
+ exprSetCollation(node, collation);
+ }
+ break;
+ case T_RowExpr:
+ {
+ /*
+ * RowExpr is a special case because the subexpressions
+ * are independent: we don't want to complain if some of
+ * them have incompatible explicit collations.
+ */
+ RowExpr *expr = (RowExpr *) node;
+
+ assign_list_collations(context->pstate, expr->args);
+
+ /*
+ * Since the result is always composite and therefore never
+ * has a collation, we can just stop here: this node has no
+ * impact on the collation of its parent.
+ */
+ return false; /* done */
+ }
+ case T_RowCompareExpr:
+ {
+ /*
+ * For RowCompare, we have to find the common collation of
+ * each pair of input columns and build a list. If we can't
+ * find a common collation, we just put InvalidOid into the
+ * list, which may or may not cause an error at runtime.
+ */
+ RowCompareExpr *expr = (RowCompareExpr *) node;
+ List *colls = NIL;
+ ListCell *l;
+ ListCell *r;
+
+ forboth(l, expr->largs, r, expr->rargs)
+ {
+ Node *le = (Node *) lfirst(l);
+ Node *re = (Node *) lfirst(r);
+ Oid coll;
+
+ coll = select_common_collation(context->pstate,
+ list_make2(le, re),
+ true);
+ colls = lappend_oid(colls, coll);
+ }
+ expr->inputcollids = colls;
+
+ /*
+ * Since the result is always boolean and therefore never
+ * has a collation, we can just stop here: this node has no
+ * impact on the collation of its parent.
+ */
+ return false; /* done */
+ }
+ case T_CoerceToDomain:
+ {
+ /*
+ * If the domain declaration included a non-default COLLATE
+ * spec, then use that collation as the output collation of
+ * the coercion. Otherwise allow the input collation to
+ * bubble up. (The input should be of the domain's base
+ * type, therefore we don't need to worry about it not being
+ * collatable when the domain is.)
+ */
+ CoerceToDomain *expr = (CoerceToDomain *) node;
+ Oid typcollation = get_typcollation(expr->resulttype);
+
+ /* ... but first, recurse */
+ (void) expression_tree_walker(node,
+ assign_collations_walker,
+ (void *) &loccontext);
+
+ if (OidIsValid(typcollation))
+ {
+ /* Node's result type is collatable. */
+ if (typcollation == DEFAULT_COLLATION_OID)
+ {
+ /* Collation state bubbles up from child. */
+ collation = loccontext.collation;
+ strength = loccontext.strength;
+ location = loccontext.location;
+ }
+ else
+ {
+ /* Use domain's collation as an implicit choice. */
+ collation = typcollation;
+ strength = COLLATE_IMPLICIT;
+ location = exprLocation(node);
+ }
+ }
+ else
+ {
+ /* Node's result type isn't collatable. */
+ collation = InvalidOid;
+ strength = COLLATE_NONE;
+ location = -1; /* won't be used */
+ }
+
+ /*
+ * Save the state into the expression node. We know it
+ * doesn't care about input collation.
+ */
+ if (strength == COLLATE_CONFLICT)
+ exprSetCollation(node, InvalidOid);
+ else
+ exprSetCollation(node, collation);
+ }
+ break;
+ case T_TargetEntry:
+ (void) expression_tree_walker(node,
+ assign_collations_walker,
+ (void *) &loccontext);
+
+ /*
+ * TargetEntry can have only one child, and should bubble that
+ * state up to its parent. We can't use the general-case code
+ * below because exprType and friends don't work on TargetEntry.
+ */
+ collation = loccontext.collation;
+ strength = loccontext.strength;
+ location = loccontext.location;
+ break;
+ case T_RangeTblRef:
+ case T_JoinExpr:
+ case T_FromExpr:
+ case T_SortGroupClause:
+ (void) expression_tree_walker(node,
+ assign_collations_walker,
+ (void *) &loccontext);
+ /*
+ * When we're invoked on a query's jointree, we don't need to do
+ * anything with join nodes except recurse through them to process
+ * WHERE/ON expressions. So just stop here. Likewise, we don't
+ * need to do anything when invoked on sort/group lists.
+ */
+ return false;
+ case T_Query:
+ {
+ /*
+ * We get here when we're invoked on the Query belonging to a
+ * SubLink. Act as though the Query returns its first output
+ * column, which indeed is what it does for EXPR_SUBLINK and
+ * ARRAY_SUBLINK cases. In the cases where the SubLink
+ * returns boolean, this info will be ignored.
+ *
+ * We needn't recurse, since the Query is already processed.
+ */
+ Query *qtree = (Query *) node;
+ TargetEntry *tent;
+
+ tent = (TargetEntry *) linitial(qtree->targetList);
+ Assert(IsA(tent, TargetEntry));
+ Assert(!tent->resjunk);
+ collation = exprCollation((Node *) tent->expr);
+ /* collation doesn't change if it's converted to array */
+ strength = COLLATE_IMPLICIT;
+ location = exprLocation((Node *) tent->expr);
+ }
+ break;
+ case T_List:
+ (void) expression_tree_walker(node,
+ assign_collations_walker,
+ (void *) &loccontext);
+
+ /*
+ * When processing a list, collation state just bubbles up from
+ * the list elements.
+ */
+ collation = loccontext.collation;
+ strength = loccontext.strength;
+ location = loccontext.location;
+ break;
+
+ case T_Var:
+ case T_Const:
+ case T_Param:
+ case T_CoerceToDomainValue:
+ case T_CaseTestExpr:
+ case T_SetToDefault:
+ case T_CurrentOfExpr:
+ /*
+ * General case for childless expression nodes. These should
+ * already have a collation assigned; it is not this function's
+ * responsibility to look into the catalogs for base-case
+ * information.
+ */
+ collation = exprCollation(node);
+
+ /*
+ * Note: in most cases, there will be an assigned collation
+ * whenever type_is_collatable(exprType(node)); but an exception
+ * occurs for a Var referencing a subquery output column for
+ * which a unique collation was not determinable. That may lead
+ * to a runtime failure if a collation-sensitive function is
+ * applied to the Var.
+ */
+
+ if (OidIsValid(collation))
+ strength = COLLATE_IMPLICIT;
+ else
+ strength = COLLATE_NONE;
+ location = exprLocation(node);
+ break;
+
+ default:
+ {
+ /*
+ * General case for most expression nodes with children.
+ * First recurse, then figure out what to assign here.
+ */
+ Oid typcollation;
+
+ (void) expression_tree_walker(node,
+ assign_collations_walker,
+ (void *) &loccontext);
+
+ typcollation = get_typcollation(exprType(node));
+ if (OidIsValid(typcollation))
+ {
+ /* Node's result is collatable; what about its input? */
+ if (loccontext.strength > COLLATE_NONE)
+ {
+ /* Collation state bubbles up from children. */
+ collation = loccontext.collation;
+ strength = loccontext.strength;
+ location = loccontext.location;
+ }
+ else
+ {
+ /*
+ * Collatable output produced without any collatable
+ * input. Use the type's collation (which is usually
+ * DEFAULT_COLLATION_OID, but might be different for a
+ * domain).
+ */
+ collation = typcollation;
+ strength = COLLATE_IMPLICIT;
+ location = exprLocation(node);
+ }
+ }
+ else
+ {
+ /* Node's result type isn't collatable. */
+ collation = InvalidOid;
+ strength = COLLATE_NONE;
+ location = -1; /* won't be used */
+ }
+
+ /*
+ * Save the result collation into the expression node.
+ * If the state is COLLATE_CONFLICT, we'll set the collation
+ * to InvalidOid, which might result in an error at runtime.
+ */
+ if (strength == COLLATE_CONFLICT)
+ exprSetCollation(node, InvalidOid);
+ else
+ exprSetCollation(node, collation);
+
+ /*
+ * Likewise save the input collation, which is the one that
+ * any function called by this node should use.
+ */
+ if (loccontext.strength == COLLATE_CONFLICT)
+ exprSetInputCollation(node, InvalidOid);
+ else
+ exprSetInputCollation(node, loccontext.collation);
+ }
+ break;
+ }
+
+ /*
+ * Now, merge my information into my parent's state. If the collation
+ * strength for this node is different from what's already in *context,
+ * then this node either dominates or is dominated by earlier siblings.
+ */
+ if (strength > context->strength)
+ {
+ /* Override previous parent state */
+ context->collation = collation;
+ context->strength = strength;
+ context->location = location;
+ /* Bubble up error info if applicable */
+ if (strength == COLLATE_CONFLICT)
+ {
+ context->collation2 = loccontext.collation2;
+ context->location2 = loccontext.location2;
+ }
+ }
+ else if (strength == context->strength)
+ {
+ /* Merge, or detect error if there's a collation conflict */
+ switch (strength)
+ {
+ case COLLATE_NONE:
+ /* Nothing + nothing is still nothing */
+ break;
+ case COLLATE_IMPLICIT:
+ if (collation != context->collation)
+ {
+ /*
+ * Non-default implicit collation always beats default.
+ */
+ if (context->collation == DEFAULT_COLLATION_OID)
+ {
+ /* Override previous parent state */
+ context->collation = collation;
+ context->strength = strength;
+ context->location = location;
+ }
+ else if (collation != DEFAULT_COLLATION_OID)
+ {
+ /*
+ * Ooops, we have a conflict. We cannot throw error
+ * here, since the conflict could be resolved by a
+ * later sibling CollateExpr, or the parent might not
+ * care about collation anyway. Return enough info to
+ * throw the error later, if needed.
+ */
+ context->strength = COLLATE_CONFLICT;
+ context->collation2 = collation;
+ context->location2 = location;
+ }
+ }
+ break;
+ case COLLATE_CONFLICT:
+ /* We're still conflicted ... */
+ break;
+ case COLLATE_EXPLICIT:
+ if (collation != context->collation)
+ {
+ /*
+ * Ooops, we have a conflict of explicit COLLATE clauses.
+ * Here we choose to throw error immediately; that is what
+ * the SQL standard says to do, and there's no good reason
+ * to be less strict.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_COLLATION_MISMATCH),
+ errmsg("collation mismatch between explicit collations \"%s\" and \"%s\"",
+ get_collation_name(context->collation),
+ get_collation_name(collation)),
+ parser_errposition(context->pstate, location)));
+ }
+ break;
+ }
+ }
+
+ return false;
+}