diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2011-03-19 20:29:08 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2011-03-19 20:30:08 -0400 |
commit | b310b6e31ce5aa9e456c43c0e8e93248b0c84c02 (patch) | |
tree | e5168fcfdb231a9889e87e309f38a9e0f05a7896 /src/backend/parser/parse_collate.c | |
parent | 025f4c72f029242a6aaf3f14bb6d7da4ce070f72 (diff) | |
download | postgresql-b310b6e31ce5aa9e456c43c0e8e93248b0c84c02.tar.gz postgresql-b310b6e31ce5aa9e456c43c0e8e93248b0c84c02.zip |
Revise collation derivation method and expression-tree representation.
All expression nodes now have an explicit output-collation field, unless
they are known to only return a noncollatable data type (such as boolean
or record). Also, nodes that can invoke collation-aware functions store
a separate field that is the collation value to pass to the function.
This avoids confusion that arises when a function has collatable inputs
and noncollatable output type, or vice versa.
Also, replace the parser's on-the-fly collation assignment method with
a post-pass over the completed expression tree. This allows us to use
a more complex (and hopefully more nearly spec-compliant) assignment
rule without paying for it in extra storage in every expression node.
Fix assorted bugs in the planner's handling of collations by making
collation one of the defining properties of an EquivalenceClass and
by converting CollateExprs into discardable RelabelType nodes during
expression preprocessing.
Diffstat (limited to 'src/backend/parser/parse_collate.c')
-rw-r--r-- | src/backend/parser/parse_collate.c | 763 |
1 files changed, 763 insertions, 0 deletions
diff --git a/src/backend/parser/parse_collate.c b/src/backend/parser/parse_collate.c new file mode 100644 index 00000000000..0b77e3ea2b7 --- /dev/null +++ b/src/backend/parser/parse_collate.c @@ -0,0 +1,763 @@ +/*------------------------------------------------------------------------- + * + * parse_collate.c + * Routines for assigning collation information. + * + * We choose to handle collation analysis in a post-pass over the output + * of expression parse analysis. This is because we need more state to + * perform this processing than is needed in the finished tree. If we + * did it on-the-fly while building the tree, all that state would have + * to be kept in expression node trees permanently. This way, the extra + * storage is just local variables in this recursive routine. + * + * The info that is actually saved in the finished tree is: + * 1. The output collation of each expression node, or InvalidOid if it + * returns a noncollatable data type. This can also be InvalidOid if the + * result type is collatable but the collation is indeterminate. + * 2. The collation to be used in executing each function. InvalidOid means + * that there are no collatable inputs or their collation is indeterminate. + * This value is only stored in node types that might call collation-using + * functions. + * + * You might think we could get away with storing only one collation per + * node, but the two concepts really need to be kept distinct. Otherwise + * it's too confusing when a function produces a collatable output type but + * has no collatable inputs or produces noncollatable output from collatable + * inputs. + * + * Cases with indeterminate collation might result in an error being thrown + * at runtime. If we knew exactly which functions require collation + * information, we could throw those errors at parse time instead. + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/parser/parse_collate.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/pg_collation.h" +#include "nodes/nodeFuncs.h" +#include "parser/parse_collate.h" +#include "utils/lsyscache.h" + + +/* + * Collation strength (the SQL standard calls this "derivation"). Order is + * chosen to allow comparisons to work usefully. Note: the standard doesn't + * seem to distingish between NONE and CONFLICT. + */ +typedef enum +{ + COLLATE_NONE, /* expression is of a noncollatable datatype */ + COLLATE_IMPLICIT, /* collation was derived implicitly */ + COLLATE_CONFLICT, /* we had a conflict of implicit collations */ + COLLATE_EXPLICIT /* collation was derived explicitly */ +} CollateStrength; + +typedef struct +{ + ParseState *pstate; /* parse state (for error reporting) */ + Oid collation; /* OID of current collation, if any */ + CollateStrength strength; /* strength of current collation choice */ + int location; /* location of expr that set collation */ + /* Remaining fields are only valid when strength == COLLATE_CONFLICT */ + Oid collation2; /* OID of conflicting collation */ + int location2; /* location of expr that set collation2 */ +} assign_collations_context; + +static bool assign_query_collations_walker(Node *node, ParseState *pstate); +static bool assign_collations_walker(Node *node, + assign_collations_context *context); + + +/* + * assign_query_collations() + * Mark all expressions in the given Query with collation information. + * + * This should be applied to each Query after completion of parse analysis + * for expressions. Note that we do not recurse into sub-Queries, since + * those should have been processed when built. + */ +void +assign_query_collations(ParseState *pstate, Query *query) +{ + /* + * We just use query_tree_walker() to visit all the contained expressions. + * We can skip the rangetable and CTE subqueries, though, since RTEs and + * subqueries had better have been processed already (else Vars referring + * to them would not get created with the right collation). + */ + (void) query_tree_walker(query, + assign_query_collations_walker, + (void *) pstate, + QTW_IGNORE_RANGE_TABLE | + QTW_IGNORE_CTE_SUBQUERIES); +} + +/* + * Walker for assign_query_collations + * + * Each expression found by query_tree_walker is processed independently. + * Note that query_tree_walker may pass us a whole List, such as the + * targetlist, in which case each subexpression must be processed + * independently --- we don't want to bleat if two different targetentries + * have different collations. + */ +static bool +assign_query_collations_walker(Node *node, ParseState *pstate) +{ + /* Need do nothing for empty subexpressions */ + if (node == NULL) + return false; + + /* + * We don't want to recurse into a set-operations tree; it's already + * been fully processed in transformSetOperationStmt. + */ + if (IsA(node, SetOperationStmt)) + return false; + + if (IsA(node, List)) + assign_list_collations(pstate, (List *) node); + else + assign_expr_collations(pstate, node); + + return false; +} + +/* + * assign_list_collations() + * Mark all nodes in the list of expressions with collation information. + * + * The list member expressions are processed independently; they do not have + * to share a common collation. + */ +void +assign_list_collations(ParseState *pstate, List *exprs) +{ + ListCell *lc; + + foreach(lc, exprs) + { + Node *node = (Node *) lfirst(lc); + + assign_expr_collations(pstate, node); + } +} + +/* + * assign_expr_collations() + * Mark all nodes in the given expression tree with collation information. + * + * This is exported for the benefit of various utility commands that process + * expressions without building a complete Query. It should be applied after + * calling transformExpr() plus any expression-modifying operations such as + * coerce_to_boolean(). + */ +void +assign_expr_collations(ParseState *pstate, Node *expr) +{ + assign_collations_context context; + + /* initialize context for tree walk */ + context.pstate = pstate; + context.collation = InvalidOid; + context.strength = COLLATE_NONE; + context.location = -1; + + /* and away we go */ + (void) assign_collations_walker(expr, &context); +} + +/* + * select_common_collation() + * Identify a common collation for a list of expressions. + * + * The expressions should all return the same datatype, else this is not + * terribly meaningful. + * + * none_ok means that it is permitted to return InvalidOid, indicating that + * no common collation could be identified, even for collatable datatypes. + * Otherwise, an error is thrown for conflict of implicit collations. + * + * In theory, none_ok = true reflects the rules of SQL standard clause "Result + * of data type combinations", none_ok = false reflects the rules of clause + * "Collation determination" (in some cases invoked via "Grouping + * operations"). + */ +Oid +select_common_collation(ParseState *pstate, List *exprs, bool none_ok) +{ + assign_collations_context context; + + /* initialize context for tree walk */ + context.pstate = pstate; + context.collation = InvalidOid; + context.strength = COLLATE_NONE; + context.location = -1; + + /* and away we go */ + (void) assign_collations_walker((Node *) exprs, &context); + + /* deal with collation conflict */ + if (context.strength == COLLATE_CONFLICT) + { + if (none_ok) + return InvalidOid; + ereport(ERROR, + (errcode(ERRCODE_COLLATION_MISMATCH), + errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"", + get_collation_name(context.collation), + get_collation_name(context.collation2)), + errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."), + parser_errposition(context.pstate, context.location2))); + } + + /* + * Note: if strength is still COLLATE_NONE, we'll return InvalidOid, but + * that's okay because it must mean none of the expressions returned + * collatable datatypes. + */ + return context.collation; +} + +/* + * assign_collations_walker() + * Recursive guts of collation processing. + * + * Nodes with no children (eg, Vars, Consts, Params) must have been marked + * when built. All upper-level nodes are marked here. + * + * Note: if this is invoked directly on a List, it will attempt to infer a + * common collation for all the list members. In particular, it will throw + * error if there are conflicting explicit collations for different members. + */ +static bool +assign_collations_walker(Node *node, assign_collations_context *context) +{ + assign_collations_context loccontext; + Oid collation; + CollateStrength strength; + int location; + + /* Need do nothing for empty subexpressions */ + if (node == NULL) + return false; + + /* + * Prepare for recursion. For most node types, though not all, the + * first thing we do is recurse to process all nodes below this one. + * Each level of the tree has its own local context. + */ + loccontext.pstate = context->pstate; + loccontext.collation = InvalidOid; + loccontext.strength = COLLATE_NONE; + loccontext.location = -1; + + /* + * Recurse if appropriate, then determine the collation for this node. + * + * Note: the general cases are at the bottom of the switch, after various + * special cases. + */ + switch (nodeTag(node)) + { + case T_CollateExpr: + { + /* + * COLLATE sets an explicitly derived collation, regardless of + * what the child state is. But we must recurse to set up + * collation info below here. + */ + CollateExpr *expr = (CollateExpr *) node; + + (void) expression_tree_walker(node, + assign_collations_walker, + (void *) &loccontext); + + collation = expr->collOid; + Assert(OidIsValid(collation)); + strength = COLLATE_EXPLICIT; + location = expr->location; + } + break; + case T_FieldSelect: + { + /* + * FieldSelect is a special case because the field may have + * a non-default collation, in which case we should use that. + * The field's collation was already looked up and saved + * in the node. + */ + FieldSelect *expr = (FieldSelect *) node; + + /* ... but first, recurse */ + (void) expression_tree_walker(node, + assign_collations_walker, + (void *) &loccontext); + + if (OidIsValid(expr->resultcollid)) + { + /* Node's result type is collatable. */ + if (expr->resultcollid == DEFAULT_COLLATION_OID) + { + /* + * The immediate input node necessarily yields a + * composite type, so it will have no exposed + * collation. However, if we are selecting a field + * from a function returning composite, see if we + * can bubble up a collation from the function's + * input. XXX this is a bit of a hack, rethink ... + */ + if (IsA(expr->arg, FuncExpr)) + { + FuncExpr *fexpr = (FuncExpr *) expr->arg; + + if (OidIsValid(fexpr->inputcollid)) + expr->resultcollid = fexpr->inputcollid; + } + } + /* Pass up field's collation as an implicit choice. */ + collation = expr->resultcollid; + strength = COLLATE_IMPLICIT; + location = exprLocation(node); + } + else + { + /* Node's result type isn't collatable. */ + collation = InvalidOid; + strength = COLLATE_NONE; + location = -1; /* won't be used */ + } + } + break; + case T_CaseExpr: + { + /* + * CaseExpr is a special case because we do not want to + * recurse into the test expression (if any). It was + * already marked with collations during transformCaseExpr, + * and furthermore its collation is not relevant to the + * result of the CASE --- only the output expressions are. + * So we can't use expression_tree_walker here. + */ + CaseExpr *expr = (CaseExpr *) node; + Oid typcollation; + ListCell *lc; + + foreach(lc, expr->args) + { + CaseWhen *when = (CaseWhen *) lfirst(lc); + + Assert(IsA(when, CaseWhen)); + /* + * The condition expressions mustn't affect the CASE's + * result collation either; but since they are known to + * yield boolean, it's safe to recurse directly on them + * --- they won't change loccontext. + */ + (void) assign_collations_walker((Node *) when->expr, + &loccontext); + (void) assign_collations_walker((Node *) when->result, + &loccontext); + } + (void) assign_collations_walker((Node *) expr->defresult, + &loccontext); + + /* + * Now determine the CASE's output collation. This is the + * same as the general case below. + */ + typcollation = get_typcollation(exprType(node)); + if (OidIsValid(typcollation)) + { + /* Node's result is collatable; what about its input? */ + if (loccontext.strength > COLLATE_NONE) + { + /* Collation state bubbles up from children. */ + collation = loccontext.collation; + strength = loccontext.strength; + location = loccontext.location; + } + else + { + /* + * Collatable output produced without any collatable + * input. Use the type's collation (which is usually + * DEFAULT_COLLATION_OID, but might be different for a + * domain). + */ + collation = typcollation; + strength = COLLATE_IMPLICIT; + location = exprLocation(node); + } + } + else + { + /* Node's result type isn't collatable. */ + collation = InvalidOid; + strength = COLLATE_NONE; + location = -1; /* won't be used */ + } + + /* + * Save the state into the expression node. We know it + * doesn't care about input collation. + */ + if (strength == COLLATE_CONFLICT) + exprSetCollation(node, InvalidOid); + else + exprSetCollation(node, collation); + } + break; + case T_RowExpr: + { + /* + * RowExpr is a special case because the subexpressions + * are independent: we don't want to complain if some of + * them have incompatible explicit collations. + */ + RowExpr *expr = (RowExpr *) node; + + assign_list_collations(context->pstate, expr->args); + + /* + * Since the result is always composite and therefore never + * has a collation, we can just stop here: this node has no + * impact on the collation of its parent. + */ + return false; /* done */ + } + case T_RowCompareExpr: + { + /* + * For RowCompare, we have to find the common collation of + * each pair of input columns and build a list. If we can't + * find a common collation, we just put InvalidOid into the + * list, which may or may not cause an error at runtime. + */ + RowCompareExpr *expr = (RowCompareExpr *) node; + List *colls = NIL; + ListCell *l; + ListCell *r; + + forboth(l, expr->largs, r, expr->rargs) + { + Node *le = (Node *) lfirst(l); + Node *re = (Node *) lfirst(r); + Oid coll; + + coll = select_common_collation(context->pstate, + list_make2(le, re), + true); + colls = lappend_oid(colls, coll); + } + expr->inputcollids = colls; + + /* + * Since the result is always boolean and therefore never + * has a collation, we can just stop here: this node has no + * impact on the collation of its parent. + */ + return false; /* done */ + } + case T_CoerceToDomain: + { + /* + * If the domain declaration included a non-default COLLATE + * spec, then use that collation as the output collation of + * the coercion. Otherwise allow the input collation to + * bubble up. (The input should be of the domain's base + * type, therefore we don't need to worry about it not being + * collatable when the domain is.) + */ + CoerceToDomain *expr = (CoerceToDomain *) node; + Oid typcollation = get_typcollation(expr->resulttype); + + /* ... but first, recurse */ + (void) expression_tree_walker(node, + assign_collations_walker, + (void *) &loccontext); + + if (OidIsValid(typcollation)) + { + /* Node's result type is collatable. */ + if (typcollation == DEFAULT_COLLATION_OID) + { + /* Collation state bubbles up from child. */ + collation = loccontext.collation; + strength = loccontext.strength; + location = loccontext.location; + } + else + { + /* Use domain's collation as an implicit choice. */ + collation = typcollation; + strength = COLLATE_IMPLICIT; + location = exprLocation(node); + } + } + else + { + /* Node's result type isn't collatable. */ + collation = InvalidOid; + strength = COLLATE_NONE; + location = -1; /* won't be used */ + } + + /* + * Save the state into the expression node. We know it + * doesn't care about input collation. + */ + if (strength == COLLATE_CONFLICT) + exprSetCollation(node, InvalidOid); + else + exprSetCollation(node, collation); + } + break; + case T_TargetEntry: + (void) expression_tree_walker(node, + assign_collations_walker, + (void *) &loccontext); + + /* + * TargetEntry can have only one child, and should bubble that + * state up to its parent. We can't use the general-case code + * below because exprType and friends don't work on TargetEntry. + */ + collation = loccontext.collation; + strength = loccontext.strength; + location = loccontext.location; + break; + case T_RangeTblRef: + case T_JoinExpr: + case T_FromExpr: + case T_SortGroupClause: + (void) expression_tree_walker(node, + assign_collations_walker, + (void *) &loccontext); + /* + * When we're invoked on a query's jointree, we don't need to do + * anything with join nodes except recurse through them to process + * WHERE/ON expressions. So just stop here. Likewise, we don't + * need to do anything when invoked on sort/group lists. + */ + return false; + case T_Query: + { + /* + * We get here when we're invoked on the Query belonging to a + * SubLink. Act as though the Query returns its first output + * column, which indeed is what it does for EXPR_SUBLINK and + * ARRAY_SUBLINK cases. In the cases where the SubLink + * returns boolean, this info will be ignored. + * + * We needn't recurse, since the Query is already processed. + */ + Query *qtree = (Query *) node; + TargetEntry *tent; + + tent = (TargetEntry *) linitial(qtree->targetList); + Assert(IsA(tent, TargetEntry)); + Assert(!tent->resjunk); + collation = exprCollation((Node *) tent->expr); + /* collation doesn't change if it's converted to array */ + strength = COLLATE_IMPLICIT; + location = exprLocation((Node *) tent->expr); + } + break; + case T_List: + (void) expression_tree_walker(node, + assign_collations_walker, + (void *) &loccontext); + + /* + * When processing a list, collation state just bubbles up from + * the list elements. + */ + collation = loccontext.collation; + strength = loccontext.strength; + location = loccontext.location; + break; + + case T_Var: + case T_Const: + case T_Param: + case T_CoerceToDomainValue: + case T_CaseTestExpr: + case T_SetToDefault: + case T_CurrentOfExpr: + /* + * General case for childless expression nodes. These should + * already have a collation assigned; it is not this function's + * responsibility to look into the catalogs for base-case + * information. + */ + collation = exprCollation(node); + + /* + * Note: in most cases, there will be an assigned collation + * whenever type_is_collatable(exprType(node)); but an exception + * occurs for a Var referencing a subquery output column for + * which a unique collation was not determinable. That may lead + * to a runtime failure if a collation-sensitive function is + * applied to the Var. + */ + + if (OidIsValid(collation)) + strength = COLLATE_IMPLICIT; + else + strength = COLLATE_NONE; + location = exprLocation(node); + break; + + default: + { + /* + * General case for most expression nodes with children. + * First recurse, then figure out what to assign here. + */ + Oid typcollation; + + (void) expression_tree_walker(node, + assign_collations_walker, + (void *) &loccontext); + + typcollation = get_typcollation(exprType(node)); + if (OidIsValid(typcollation)) + { + /* Node's result is collatable; what about its input? */ + if (loccontext.strength > COLLATE_NONE) + { + /* Collation state bubbles up from children. */ + collation = loccontext.collation; + strength = loccontext.strength; + location = loccontext.location; + } + else + { + /* + * Collatable output produced without any collatable + * input. Use the type's collation (which is usually + * DEFAULT_COLLATION_OID, but might be different for a + * domain). + */ + collation = typcollation; + strength = COLLATE_IMPLICIT; + location = exprLocation(node); + } + } + else + { + /* Node's result type isn't collatable. */ + collation = InvalidOid; + strength = COLLATE_NONE; + location = -1; /* won't be used */ + } + + /* + * Save the result collation into the expression node. + * If the state is COLLATE_CONFLICT, we'll set the collation + * to InvalidOid, which might result in an error at runtime. + */ + if (strength == COLLATE_CONFLICT) + exprSetCollation(node, InvalidOid); + else + exprSetCollation(node, collation); + + /* + * Likewise save the input collation, which is the one that + * any function called by this node should use. + */ + if (loccontext.strength == COLLATE_CONFLICT) + exprSetInputCollation(node, InvalidOid); + else + exprSetInputCollation(node, loccontext.collation); + } + break; + } + + /* + * Now, merge my information into my parent's state. If the collation + * strength for this node is different from what's already in *context, + * then this node either dominates or is dominated by earlier siblings. + */ + if (strength > context->strength) + { + /* Override previous parent state */ + context->collation = collation; + context->strength = strength; + context->location = location; + /* Bubble up error info if applicable */ + if (strength == COLLATE_CONFLICT) + { + context->collation2 = loccontext.collation2; + context->location2 = loccontext.location2; + } + } + else if (strength == context->strength) + { + /* Merge, or detect error if there's a collation conflict */ + switch (strength) + { + case COLLATE_NONE: + /* Nothing + nothing is still nothing */ + break; + case COLLATE_IMPLICIT: + if (collation != context->collation) + { + /* + * Non-default implicit collation always beats default. + */ + if (context->collation == DEFAULT_COLLATION_OID) + { + /* Override previous parent state */ + context->collation = collation; + context->strength = strength; + context->location = location; + } + else if (collation != DEFAULT_COLLATION_OID) + { + /* + * Ooops, we have a conflict. We cannot throw error + * here, since the conflict could be resolved by a + * later sibling CollateExpr, or the parent might not + * care about collation anyway. Return enough info to + * throw the error later, if needed. + */ + context->strength = COLLATE_CONFLICT; + context->collation2 = collation; + context->location2 = location; + } + } + break; + case COLLATE_CONFLICT: + /* We're still conflicted ... */ + break; + case COLLATE_EXPLICIT: + if (collation != context->collation) + { + /* + * Ooops, we have a conflict of explicit COLLATE clauses. + * Here we choose to throw error immediately; that is what + * the SQL standard says to do, and there's no good reason + * to be less strict. + */ + ereport(ERROR, + (errcode(ERRCODE_COLLATION_MISMATCH), + errmsg("collation mismatch between explicit collations \"%s\" and \"%s\"", + get_collation_name(context->collation), + get_collation_name(collation)), + parser_errposition(context->pstate, location))); + } + break; + } + } + + return false; +} |