diff options
author | Peter Eisentraut <peter_e@gmx.net> | 2006-12-21 16:05:16 +0000 |
---|---|---|
committer | Peter Eisentraut <peter_e@gmx.net> | 2006-12-21 16:05:16 +0000 |
commit | 8c1de5fb0010ae712568f1706b737270c3609bd8 (patch) | |
tree | bc328a654c41ea3eb1a9a27b76fd5215fb698608 /src/backend | |
parent | ed1e9cd501b4dc89a6a7e5cef702f2f6830ae829 (diff) | |
download | postgresql-8c1de5fb0010ae712568f1706b737270c3609bd8.tar.gz postgresql-8c1de5fb0010ae712568f1706b737270c3609bd8.zip |
Initial SQL/XML support: xml data type and initial set of functions.
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/executor/execQual.c | 177 | ||||
-rw-r--r-- | src/backend/nodes/copyfuncs.c | 21 | ||||
-rw-r--r-- | src/backend/nodes/equalfuncs.c | 16 | ||||
-rw-r--r-- | src/backend/nodes/outfuncs.c | 16 | ||||
-rw-r--r-- | src/backend/nodes/readfuncs.c | 20 | ||||
-rw-r--r-- | src/backend/optimizer/util/clauses.c | 27 | ||||
-rw-r--r-- | src/backend/parser/gram.y | 198 | ||||
-rw-r--r-- | src/backend/parser/keywords.c | 19 | ||||
-rw-r--r-- | src/backend/parser/parse_coerce.c | 42 | ||||
-rw-r--r-- | src/backend/parser/parse_expr.c | 61 | ||||
-rw-r--r-- | src/backend/parser/parse_target.c | 17 | ||||
-rw-r--r-- | src/backend/utils/adt/Makefile | 4 | ||||
-rw-r--r-- | src/backend/utils/adt/ruleutils.c | 27 | ||||
-rw-r--r-- | src/backend/utils/adt/xml.c | 942 | ||||
-rw-r--r-- | src/backend/utils/mb/mbutils.c | 4 |
15 files changed, 1566 insertions, 25 deletions
diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index 1dbef5f15c9..10b02b4a3ec 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.199 2006/11/17 16:46:27 petere Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.200 2006/12/21 16:05:13 petere Exp $ * *------------------------------------------------------------------------- */ @@ -52,6 +52,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/typcache.h" +#include "utils/xml.h" /* static function decls */ @@ -119,6 +120,8 @@ static Datum ExecEvalMinMax(MinMaxExprState *minmaxExpr, static Datum ExecEvalNullIf(FuncExprState *nullIfExpr, ExprContext *econtext, bool *isNull, ExprDoneCond *isDone); +static Datum ExecEvalXml(XmlExprState *xmlExpr, ExprContext *econtext, + bool *isNull, ExprDoneCond *isDone); static Datum ExecEvalNullTest(NullTestState *nstate, ExprContext *econtext, bool *isNull, ExprDoneCond *isDone); @@ -2878,6 +2881,120 @@ ExecEvalBooleanTest(GenericExprState *bstate, } } +/* ---------------------------------------------------------------- + * ExecEvalXml + * ---------------------------------------------------------------- + */ + +static Datum +ExecEvalXml(XmlExprState *xmlExpr, ExprContext *econtext, + bool *isNull, ExprDoneCond *isDone) +{ + StringInfoData buf; + bool isnull; + ListCell *arg; + text *result = NULL; + int len; + + initStringInfo(&buf); + + *isNull = false; + + if (isDone) + *isDone = ExprSingleResult; + + switch (xmlExpr->op) + { + case IS_XMLCONCAT: + *isNull = true; + + foreach(arg, xmlExpr->args) + { + ExprState *e = (ExprState *) lfirst(arg); + Datum value = ExecEvalExpr(e, econtext, &isnull, NULL); + + if (!isnull) + { + appendStringInfoString(&buf, DatumGetCString(OidFunctionCall1(xmlExpr->arg_typeout, value))); + *isNull = false; + } + } + break; + + case IS_XMLELEMENT: + { + int state = 0, i = 0; + appendStringInfo(&buf, "<%s", xmlExpr->name); + foreach(arg, xmlExpr->named_args) + { + GenericExprState *gstate = (GenericExprState *) lfirst(arg); + Datum value = ExecEvalExpr(gstate->arg, econtext, &isnull, NULL); + if (!isnull) + { + char *outstr = DatumGetCString(OidFunctionCall1(xmlExpr->named_args_tcache[i], value)); + appendStringInfo(&buf, " %s=\"%s\"", xmlExpr->named_args_ncache[i], outstr); + pfree(outstr); + } + i++; + } + if (xmlExpr->args) + { + ExprState *expr = linitial(xmlExpr->args); + Datum value = ExecEvalExpr(expr, econtext, &isnull, NULL); + + if (!isnull) + { + char *outstr = DatumGetCString(OidFunctionCall1(xmlExpr->arg_typeout, value)); + if (state == 0) + { + appendStringInfoChar(&buf, '>'); + state = 1; + } + appendStringInfo(&buf, "%s", outstr); + pfree(outstr); + } + } + + if (state == 0) + appendStringInfo(&buf, "/>"); + else if (state == 1) + appendStringInfo(&buf, "</%s>", xmlExpr->name); + + } + break; + + case IS_XMLFOREST: + { + /* only if all argumets are null returns null */ + int i = 0; + *isNull = true; + foreach(arg, xmlExpr->named_args) + { + GenericExprState *gstate = (GenericExprState *) lfirst(arg); + Datum value = ExecEvalExpr(gstate->arg, econtext, &isnull, NULL); + if (!isnull) + { + char *outstr = DatumGetCString(OidFunctionCall1(xmlExpr->named_args_tcache[i], value)); + appendStringInfo(&buf, "<%s>%s</%s>", xmlExpr->named_args_ncache[i], outstr, xmlExpr->named_args_ncache[i]); + pfree(outstr); + *isNull = false; + } + i += 1; + } + } + break; + default: + break; + } + + len = buf.len + VARHDRSZ; + result = palloc(len); + VARATT_SIZEP(result) = len; + memcpy(VARDATA(result), buf.data, buf.len); + pfree(buf.data); + PG_RETURN_TEXT_P(result); +} + /* * ExecEvalCoerceToDomain * @@ -3668,6 +3785,64 @@ ExecInitExpr(Expr *node, PlanState *parent) state = (ExprState *) mstate; } break; + case T_XmlExpr: + { + List *outlist; + ListCell *arg; + XmlExpr *xexpr = (XmlExpr *) node; + XmlExprState *xstate = makeNode(XmlExprState); + int i = 0; + Oid typeout; + + xstate->name = xexpr->name; + + xstate->xprstate.evalfunc = (ExprStateEvalFunc) ExecEvalXml; + xstate->op = xexpr->op; + + outlist = NIL; + if (xexpr->named_args) + { + xstate->named_args_tcache = (Oid *) palloc(list_length(xexpr->named_args) * sizeof(int)); + xstate->named_args_ncache = (char **) palloc(list_length(xexpr->named_args) * sizeof(char *)); + + i = 0; + foreach(arg, xexpr->named_args) + { + bool tpisvarlena; + Expr *e = (Expr *) lfirst(arg); + ExprState *estate = ExecInitExpr(e, parent); + TargetEntry *tle; + outlist = lappend(outlist, estate); + tle = (TargetEntry *) ((GenericExprState *) estate)->xprstate.expr; + getTypeOutputInfo(exprType((Node *)tle->expr), &typeout, &tpisvarlena); + xstate->named_args_ncache[i] = tle->resname; + xstate->named_args_tcache[i] = typeout; + i++; + } + } + else + { + xstate->named_args_tcache = NULL; + xstate->named_args_ncache = NULL; + } + xstate->named_args = outlist; + + outlist = NIL; + foreach(arg, xexpr->args) + { + bool tpisvarlena; + ExprState *estate; + Expr *e = (Expr *) lfirst(arg); + getTypeOutputInfo(exprType((Node *)e), &typeout, &tpisvarlena); + estate = ExecInitExpr(e, parent); + outlist = lappend(outlist, estate); + } + xstate->arg_typeout = typeout; + xstate->args = outlist; + + state = (ExprState *) xstate; + } + break; case T_NullIfExpr: { NullIfExpr *nullifexpr = (NullIfExpr *) node; diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 5047dc8ad7b..3bb95b658d1 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.354 2006/12/10 22:13:26 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.355 2006/12/21 16:05:13 petere Exp $ * *------------------------------------------------------------------------- */ @@ -1137,6 +1137,22 @@ _copyBooleanTest(BooleanTest *from) } /* + * _copyXmlExpr + */ +static XmlExpr * +_copyXmlExpr(XmlExpr *from) +{ + XmlExpr *newnode = makeNode(XmlExpr); + + COPY_SCALAR_FIELD(op); + COPY_STRING_FIELD(name); + COPY_NODE_FIELD(named_args); + COPY_NODE_FIELD(args); + + return newnode; +} + +/* * _copyCoerceToDomain */ static CoerceToDomain * @@ -2966,6 +2982,9 @@ copyObject(void *from) case T_BooleanTest: retval = _copyBooleanTest(from); break; + case T_XmlExpr: + retval = _copyXmlExpr(from); + break; case T_CoerceToDomain: retval = _copyCoerceToDomain(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index e341b74f3e6..ef21e67fafb 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -18,7 +18,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.288 2006/12/10 22:13:26 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.289 2006/12/21 16:05:13 petere Exp $ * *------------------------------------------------------------------------- */ @@ -496,6 +496,17 @@ _equalBooleanTest(BooleanTest *a, BooleanTest *b) } static bool +_equalXmlExpr(XmlExpr *a, XmlExpr *b) +{ + COMPARE_SCALAR_FIELD(op); + COMPARE_STRING_FIELD(name); + COMPARE_NODE_FIELD(named_args); + COMPARE_NODE_FIELD(args); + + return true; +} + +static bool _equalCoerceToDomain(CoerceToDomain *a, CoerceToDomain *b) { COMPARE_NODE_FIELD(arg); @@ -1968,6 +1979,9 @@ equal(void *a, void *b) case T_BooleanTest: retval = _equalBooleanTest(a, b); break; + case T_XmlExpr: + retval = _equalXmlExpr(a, b); + break; case T_CoerceToDomain: retval = _equalCoerceToDomain(a, b); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index daeb3fe872d..5ddf60dbbb1 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.286 2006/12/10 22:13:26 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.287 2006/12/21 16:05:13 petere Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -921,6 +921,17 @@ _outBooleanTest(StringInfo str, BooleanTest *node) } static void +_outXmlExpr(StringInfo str, XmlExpr *node) +{ + WRITE_NODE_TYPE("XMLEXPR"); + + WRITE_ENUM_FIELD(op, XmlExprOp); + WRITE_STRING_FIELD(name); + WRITE_NODE_FIELD(named_args); + WRITE_NODE_FIELD(args); +} + +static void _outCoerceToDomain(StringInfo str, CoerceToDomain *node) { WRITE_NODE_TYPE("COERCETODOMAIN"); @@ -2019,6 +2030,9 @@ _outNode(StringInfo str, void *obj) case T_BooleanTest: _outBooleanTest(str, obj); break; + case T_XmlExpr: + _outXmlExpr(str, obj); + break; case T_CoerceToDomain: _outCoerceToDomain(str, obj); break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index df0a2170272..689cef3edf2 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.196 2006/12/10 22:13:26 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.197 2006/12/21 16:05:13 petere Exp $ * * NOTES * Path and Plan nodes do not have any readfuncs support, because we @@ -765,6 +765,22 @@ _readBooleanTest(void) } /* + * _readXmlExpr + */ +static XmlExpr * +_readXmlExpr(void) +{ + READ_LOCALS(XmlExpr); + + READ_ENUM_FIELD(op, XmlExprOp); + READ_STRING_FIELD(name); + READ_NODE_FIELD(named_args); + READ_NODE_FIELD(args); + + READ_DONE(); +} + +/* * _readCoerceToDomain */ static CoerceToDomain * @@ -1014,6 +1030,8 @@ parseNodeString(void) return_value = _readNullTest(); else if (MATCH("BOOLEANTEST", 11)) return_value = _readBooleanTest(); + else if (MATCH("XMLEXPR", 7)) + return_value = _readXmlExpr(); else if (MATCH("COERCETODOMAIN", 14)) return_value = _readCoerceToDomain(); else if (MATCH("COERCETODOMAINVALUE", 19)) diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 3800228398e..73ad926418f 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.223 2006/10/25 22:11:32 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.224 2006/12/21 16:05:13 petere Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -559,6 +559,8 @@ expression_returns_set_walker(Node *node, void *context) return false; if (IsA(node, NullIfExpr)) return false; + if (IsA(node, XmlExpr)) + return false; return expression_tree_walker(node, expression_returns_set_walker, context); @@ -876,6 +878,8 @@ contain_nonstrict_functions_walker(Node *node, void *context) return true; if (IsA(node, BooleanTest)) return true; + if (IsA(node, XmlExpr)) + return true; return expression_tree_walker(node, contain_nonstrict_functions_walker, context); } @@ -3334,6 +3338,16 @@ expression_tree_walker(Node *node, return walker(((NullTest *) node)->arg, context); case T_BooleanTest: return walker(((BooleanTest *) node)->arg, context); + case T_XmlExpr: + { + XmlExpr *xexpr = (XmlExpr *) node; + + if (walker(xexpr->named_args, context)) + return true; + if (walker(xexpr->args, context)) + return true; + } + break; case T_CoerceToDomain: return walker(((CoerceToDomain *) node)->arg, context); case T_TargetEntry: @@ -3857,6 +3871,17 @@ expression_tree_mutator(Node *node, return (Node *) newnode; } break; + case T_XmlExpr: + { + XmlExpr *xexpr = (XmlExpr *) node; + XmlExpr *newnode; + + FLATCOPY(newnode, xexpr, XmlExpr); + MUTATE(newnode->named_args, xexpr->named_args, List *); + MUTATE(newnode->args, xexpr->args, List *); + return (Node *) newnode; + } + break; case T_NullIfExpr: { NullIfExpr *expr = (NullIfExpr *) node; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index c90743a1017..cc400407363 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.568 2006/11/05 22:42:09 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.569 2006/12/21 16:05:14 petere Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -106,6 +106,7 @@ static void insertSelectOptions(SelectStmt *stmt, static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg); static Node *doNegate(Node *n, int location); static void doNegateFloat(Value *v); +static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args); %} @@ -345,6 +346,11 @@ static void doNegateFloat(Value *v); %type <str> OptTableSpace OptConsTableSpace OptTableSpaceOwner %type <list> opt_check_option +%type <target> xml_attribute_el +%type <list> xml_attribute_list xml_attributes +%type <node> xml_root_version +%type <ival> opt_xml_root_standalone document_or_content xml_whitespace_option + /* * If you make any token changes, update the keyword table in @@ -365,13 +371,13 @@ static void doNegateFloat(Value *v); CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE CLUSTER COALESCE COLLATE COLUMN COMMENT COMMIT COMMITTED CONCURRENTLY CONNECTION CONSTRAINT CONSTRAINTS - CONVERSION_P CONVERT COPY CREATE CREATEDB + CONTENT CONVERSION_P CONVERT COPY CREATE CREATEDB CREATEROLE CREATEUSER CROSS CSV CURRENT_DATE CURRENT_ROLE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS - DESC DISABLE_P DISTINCT DO DOMAIN_P DOUBLE_P DROP + DESC DISABLE_P DISTINCT DO DOCUMENT DOMAIN_P DOUBLE_P DROP EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ESCAPE EXCEPT EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN EXTERNAL EXTRACT @@ -398,7 +404,7 @@ static void doNegateFloat(Value *v); MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE - NAMES NATIONAL NATURAL NCHAR NEW NEXT NO NOCREATEDB + NAME NAMES NATIONAL NATURAL NCHAR NEW NEXT NO NOCREATEDB NOCREATEROLE NOCREATEUSER NOINHERIT NOLOGIN_P NONE NOSUPERUSER NOT NOTHING NOTIFY NOTNULL NOWAIT NULL_P NULLIF NUMERIC @@ -417,8 +423,8 @@ static void doNegateFloat(Value *v); SAVEPOINT SCHEMA SCROLL SECOND_P SECURITY SELECT SEQUENCE SERIALIZABLE SESSION SESSION_USER SET SETOF SHARE - SHOW SIMILAR SIMPLE SMALLINT SOME STABLE START STATEMENT - STATISTICS STDIN STDOUT STORAGE STRICT_P SUBSTRING SUPERUSER_P SYMMETRIC + SHOW SIMILAR SIMPLE SMALLINT SOME STABLE STANDALONE START STATEMENT + STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP SUBSTRING SUPERUSER_P SYMMETRIC SYSID SYSTEM_P TABLE TABLESPACE TEMP TEMPLATE TEMPORARY THEN TIME TIMESTAMP @@ -428,12 +434,15 @@ static void doNegateFloat(Value *v); UNCOMMITTED UNENCRYPTED UNION UNIQUE UNKNOWN UNLISTEN UNTIL UPDATE USER USING - VACUUM VALID VALIDATOR VALUES VARCHAR VARYING - VERBOSE VIEW VOLATILE + VACUUM VALID VALIDATOR VALUE VALUES VARCHAR VARYING + VERBOSE VERSION VIEW VOLATILE + + WHEN WHERE WHITESPACE WITH WITHOUT WORK WRITE - WHEN WHERE WITH WITHOUT WORK WRITE + XMLATTRIBUTES XMLCONCAT XMLELEMENT XMLFOREST XMLPARSE + XMLPI XMLROOT XMLSERIALIZE - YEAR_P + YEAR_P YES ZONE @@ -484,6 +493,7 @@ static void doNegateFloat(Value *v); * left-associativity among the JOIN rules themselves. */ %left JOIN CROSS LEFT FULL RIGHT INNER_P NATURAL +%right PRESERVE STRIP %% /* @@ -7868,6 +7878,146 @@ func_expr: func_name '(' ')' v->op = IS_LEAST; $$ = (Node *)v; } + | XMLCONCAT '(' expr_list ')' + { + $$ = makeXmlExpr(IS_XMLCONCAT, NULL, NULL, $3); + } + | XMLELEMENT '(' NAME ColLabel ')' + { + $$ = makeXmlExpr(IS_XMLELEMENT, $4, NULL, NULL); + } + | XMLELEMENT '(' NAME ColLabel ',' xml_attributes ')' + { + $$ = makeXmlExpr(IS_XMLELEMENT, $4, $6, NULL); + } + | XMLELEMENT '(' NAME ColLabel ',' expr_list ')' + { + $$ = makeXmlExpr(IS_XMLELEMENT, $4, NULL, $6); + } + | XMLELEMENT '(' NAME ColLabel ',' xml_attributes ',' expr_list ')' + { + $$ = makeXmlExpr(IS_XMLELEMENT, $4, $6, $8); + } + | XMLFOREST '(' xml_attribute_list ')' + { + $$ = makeXmlExpr(IS_XMLFOREST, NULL, $3, NULL); + } + | XMLPARSE '(' document_or_content a_expr xml_whitespace_option ')' + { + FuncCall *n = makeNode(FuncCall); + n->funcname = SystemFuncName("xmlparse"); + n->args = list_make3(makeBoolAConst($3 == DOCUMENT), $4, makeBoolAConst($5 == PRESERVE)); + n->agg_star = FALSE; + n->agg_distinct = FALSE; + n->location = @1; + $$ = (Node *)n; + } + | XMLPI '(' NAME ColLabel ')' + { + FuncCall *n = makeNode(FuncCall); + n->funcname = SystemFuncName("xmlpi"); + n->args = list_make1(makeStringConst($4, NULL)); + n->agg_star = FALSE; + n->agg_distinct = FALSE; + n->location = @1; + $$ = (Node *)n; + } + | XMLPI '(' NAME ColLabel ',' a_expr ')' + { + FuncCall *n = makeNode(FuncCall); + n->funcname = SystemFuncName("xmlpi"); + n->args = list_make2(makeStringConst($4, NULL), $6); + n->agg_star = FALSE; + n->agg_distinct = FALSE; + n->location = @1; + $$ = (Node *)n; + } + | XMLROOT '(' a_expr ',' xml_root_version opt_xml_root_standalone ')' + { + FuncCall *n = makeNode(FuncCall); + Node *ver; + A_Const *sa; + + if ($5) + ver = $5; + else + { + A_Const *val; + + val = makeNode(A_Const); + val->val.type = T_Null; + ver = (Node *) val; + } + + if ($6) + sa = makeBoolAConst($6 == 1); + else + { + sa = makeNode(A_Const); + sa->val.type = T_Null; + } + + n->funcname = SystemFuncName("xmlroot"); + n->args = list_make3($3, ver, sa); + n->agg_star = FALSE; + n->agg_distinct = FALSE; + n->location = @1; + $$ = (Node *)n; + } + | XMLSERIALIZE '(' document_or_content a_expr AS Typename ')' + { + /* + * FIXME: This should be made distinguishable from + * CAST (for reverse compilation at least). + */ + $$ = makeTypeCast($4, $6); + } + ; + +/* + * SQL/XML support + */ +xml_root_version: VERSION a_expr { $$ = $2; } + | VERSION NO VALUE { $$ = NULL; } + ; + +opt_xml_root_standalone: ',' STANDALONE YES { $$ = 1; } + | ',' STANDALONE NO { $$ = -1; } + | ',' STANDALONE NO VALUE { $$ = 0; } + | /*EMPTY*/ { $$ = 0; } + ; + +xml_attributes: XMLATTRIBUTES '(' xml_attribute_list ')' { $$ = $3; } + ; + +xml_attribute_list: xml_attribute_el { $$ = list_make1($1); } + | xml_attribute_list ',' xml_attribute_el { $$ = lappend($1, $3); } + ; + +xml_attribute_el: a_expr AS ColLabel + { + $$ = makeNode(ResTarget); + $$->name = $3; + $$->indirection = NULL; + $$->val = (Node *) $1; + + } + | a_expr + { + $$ = makeNode(ResTarget); + $$->name = NULL; + $$->indirection = NULL; + $$->val = (Node *) $1; + } + ; + +document_or_content: DOCUMENT { $$ = DOCUMENT; } + | CONTENT { $$ = CONTENT; } + ; + +xml_whitespace_option: PRESERVE WHITESPACE { $$ = PRESERVE; } + | STRIP WHITESPACE { $$ = STRIP; } + | /*EMPTY*/ { $$ = STRIP; } ; /* @@ -8562,6 +8712,7 @@ unreserved_keyword: | CONCURRENTLY | CONNECTION | CONSTRAINTS + | CONTENT | CONVERSION_P | COPY | CREATEDB @@ -8581,6 +8732,7 @@ unreserved_keyword: | DELIMITER | DELIMITERS | DISABLE_P + | DOCUMENT | DOMAIN_P | DOUBLE_P | DROP @@ -8640,6 +8792,7 @@ unreserved_keyword: | MODE | MONTH_P | MOVE + | NAME | NAMES | NEXT | NO @@ -8700,12 +8853,14 @@ unreserved_keyword: | SHOW | SIMPLE | STABLE + | STANDALONE | START | STATEMENT | STATISTICS | STDIN | STDOUT | STORAGE + | STRIP | SUPERUSER_P | SYSID | SYSTEM_P @@ -8729,13 +8884,17 @@ unreserved_keyword: | VALID | VALIDATOR | VARYING + | VERSION | VIEW + | VALUE | VOLATILE + | WHITESPACE | WITH | WITHOUT | WORK | WRITE | YEAR_P + | YES | ZONE ; @@ -8788,6 +8947,14 @@ col_name_keyword: | TRIM | VALUES | VARCHAR + | XMLATTRIBUTES + | XMLELEMENT + | XMLCONCAT + | XMLFOREST + | XMLPARSE + | XMLPI + | XMLROOT + | XMLSERIALIZE ; /* Function identifier --- keywords that can be function names. @@ -9322,6 +9489,17 @@ doNegateFloat(Value *v) } } +static Node * +makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args) +{ + XmlExpr *x = makeNode(XmlExpr); + x->op = op; + x->name = name; + x->named_args = named_args; + x->args = args; + return (Node *) x; +} + /* * Must undefine base_yylex before including scan.c, since we want it * to create the function base_yylex not filtered_base_yylex. diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c index 50fd3aac405..b5e49e955fc 100644 --- a/src/backend/parser/keywords.c +++ b/src/backend/parser/keywords.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.177 2006/10/07 21:51:02 petere Exp $ + * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.178 2006/12/21 16:05:14 petere Exp $ * *------------------------------------------------------------------------- */ @@ -89,6 +89,7 @@ static const ScanKeyword ScanKeywords[] = { {"connection", CONNECTION}, {"constraint", CONSTRAINT}, {"constraints", CONSTRAINTS}, + {"content", CONTENT}, {"conversion", CONVERSION_P}, {"convert", CONVERT}, {"copy", COPY}, @@ -123,6 +124,7 @@ static const ScanKeyword ScanKeywords[] = { {"disable", DISABLE_P}, {"distinct", DISTINCT}, {"do", DO}, + {"document", DOCUMENT}, {"domain", DOMAIN_P}, {"double", DOUBLE_P}, {"drop", DROP}, @@ -218,6 +220,7 @@ static const ScanKeyword ScanKeywords[] = { {"mode", MODE}, {"month", MONTH_P}, {"move", MOVE}, + {"name", NAME}, {"names", NAMES}, {"national", NATIONAL}, {"natural", NATURAL}, @@ -314,6 +317,7 @@ static const ScanKeyword ScanKeywords[] = { {"smallint", SMALLINT}, {"some", SOME}, {"stable", STABLE}, + {"standalone", STANDALONE}, {"start", START}, {"statement", STATEMENT}, {"statistics", STATISTICS}, @@ -321,6 +325,7 @@ static const ScanKeyword ScanKeywords[] = { {"stdout", STDOUT}, {"storage", STORAGE}, {"strict", STRICT_P}, + {"strip", STRIP}, {"substring", SUBSTRING}, {"superuser", SUPERUSER_P}, {"symmetric", SYMMETRIC}, @@ -357,19 +362,31 @@ static const ScanKeyword ScanKeywords[] = { {"vacuum", VACUUM}, {"valid", VALID}, {"validator", VALIDATOR}, + {"value", VALUE}, {"values", VALUES}, {"varchar", VARCHAR}, {"varying", VARYING}, {"verbose", VERBOSE}, + {"version", VERSION}, {"view", VIEW}, {"volatile", VOLATILE}, {"when", WHEN}, {"where", WHERE}, + {"whitespace", WHITESPACE}, {"with", WITH}, {"without", WITHOUT}, {"work", WORK}, {"write", WRITE}, + {"xmlattributes", XMLATTRIBUTES}, + {"xmlconcat", XMLCONCAT}, + {"xmlelement", XMLELEMENT}, + {"xmlforest", XMLFOREST}, + {"xmlparse", XMLPARSE}, + {"xmlpi", XMLPI}, + {"xmlroot", XMLROOT}, + {"xmlserialize", XMLSERIALIZE}, {"year", YEAR_P}, + {"yes", YES}, {"zone", ZONE}, }; diff --git a/src/backend/parser/parse_coerce.c b/src/backend/parser/parse_coerce.c index 2a468c68271..5670ed4fe74 100644 --- a/src/backend/parser/parse_coerce.c +++ b/src/backend/parser/parse_coerce.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_coerce.c,v 2.147 2006/12/10 22:13:26 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_coerce.c,v 2.148 2006/12/21 16:05:14 petere Exp $ * *------------------------------------------------------------------------- */ @@ -919,6 +919,46 @@ coerce_to_bigint(ParseState *pstate, Node *node, return node; } +/* + * coerce_to_xml() + * Coerce an argument of a construct that requires xml input. + * Also check that input is not a set. + * + * Returns the possibly-transformed node tree. + * + * As with coerce_type, pstate may be NULL if no special unknown-Param + * processing is wanted. + */ +Node * +coerce_to_xml(ParseState *pstate, Node *node, + const char *constructName) +{ + Oid inputTypeId = exprType(node); + + if (inputTypeId != XMLOID) + { + node = coerce_to_target_type(pstate, node, inputTypeId, + XMLOID, -1, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST); + if (node == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + /* translator: first %s is name of a SQL construct, eg LIMIT */ + errmsg("argument of %s must be type xml, not type %s", + constructName, format_type_be(inputTypeId)))); + } + + if (expression_returns_set(node)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + /* translator: %s is name of a SQL construct, eg LIMIT */ + errmsg("argument of %s must not return a set", + constructName))); + + return node; +} + /* select_common_type() * Determine the common supertype of a list of input expression types. diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index b1b6ea81456..234a15b6afb 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.199 2006/12/10 22:13:26 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.200 2006/12/21 16:05:14 petere Exp $ * *------------------------------------------------------------------------- */ @@ -33,6 +33,7 @@ #include "parser/parse_type.h" #include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/xml.h" bool Transform_null_equals = false; @@ -55,6 +56,7 @@ static Node *transformArrayExpr(ParseState *pstate, ArrayExpr *a); static Node *transformRowExpr(ParseState *pstate, RowExpr *r); static Node *transformCoalesceExpr(ParseState *pstate, CoalesceExpr *c); static Node *transformMinMaxExpr(ParseState *pstate, MinMaxExpr *m); +static Node *transformXmlExpr(ParseState *pstate, XmlExpr *x); static Node *transformBooleanTest(ParseState *pstate, BooleanTest *b); static Node *transformColumnRef(ParseState *pstate, ColumnRef *cref); static Node *transformWholeRowRef(ParseState *pstate, char *schemaname, @@ -232,6 +234,10 @@ transformExpr(ParseState *pstate, Node *expr) result = transformBooleanTest(pstate, (BooleanTest *) expr); break; + case T_XmlExpr: + result = transformXmlExpr(pstate, (XmlExpr *) expr); + break; + /********************************************* * Quietly accept node types that may be presented when we are * called on an already-transformed tree. @@ -1409,6 +1415,56 @@ transformBooleanTest(ParseState *pstate, BooleanTest *b) return (Node *) b; } +static Node * +transformXmlExpr(ParseState *pstate, XmlExpr *x) +{ + ListCell *lc; + XmlExpr *newx = makeNode(XmlExpr); + + newx->op = x->op; + if (x->name) + newx->name = map_sql_identifier_to_xml_name(x->name, false); + else + newx->name = NULL; + + foreach(lc, x->named_args) + { + ResTarget *r = (ResTarget *) lfirst(lc); + Node *expr = transformExpr(pstate, r->val); + char *argname = NULL; + + if (r->name) + argname = map_sql_identifier_to_xml_name(r->name, false); + else if (IsA(r->val, ColumnRef)) + argname = map_sql_identifier_to_xml_name(FigureColname(r->val), true); + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + x->op == IS_XMLELEMENT + ? errmsg("unnamed attribute value must be a column reference") + : errmsg("unnamed element value must be a column reference"))); + + newx->named_args = lappend(newx->named_args, + makeTargetEntry((Expr *) expr, 0, argname, false)); + } + + foreach(lc, x->args) + { + Node *e = (Node *) lfirst(lc); + Node *newe; + + newe = coerce_to_xml(pstate, transformExpr(pstate, e), + (x->op == IS_XMLCONCAT + ? "XMLCONCAT" + : (x->op == IS_XMLELEMENT + ? "XMLELEMENT" + : "XMLFOREST"))); + newx->args = lappend(newx->args, newe); + } + + return (Node *) newx; +} + /* * Construct a whole-row reference to represent the notation "relation.*". * @@ -1668,6 +1724,9 @@ exprType(Node *expr) case T_BooleanTest: type = BOOLOID; break; + case T_XmlExpr: + type = XMLOID; + break; case T_CoerceToDomain: type = ((CoerceToDomain *) expr)->resulttype; break; diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c index bb4b065eebb..906d96e45c6 100644 --- a/src/backend/parser/parse_target.c +++ b/src/backend/parser/parse_target.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_target.c,v 1.149 2006/10/04 00:29:56 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_target.c,v 1.150 2006/12/21 16:05:14 petere Exp $ * *------------------------------------------------------------------------- */ @@ -1315,6 +1315,21 @@ FigureColnameInternal(Node *node, char **name) return 2; } break; + case T_XmlExpr: + /* make SQL/XML functions act like a regular function */ + switch (((XmlExpr*) node)->op) + { + case IS_XMLCONCAT: + *name = "xmlconcat"; + return 2; + case IS_XMLELEMENT: + *name = "xmlelement"; + return 2; + case IS_XMLFOREST: + *name = "xmlforest"; + return 2; + } + break; default: break; } diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 5a1996c3439..11a03f31857 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -1,7 +1,7 @@ # # Makefile for utils/adt # -# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.60 2006/04/05 22:11:55 tgl Exp $ +# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.61 2006/12/21 16:05:15 petere Exp $ # subdir = src/backend/utils/adt @@ -25,7 +25,7 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \ tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \ network.o mac.o inet_net_ntop.o inet_net_pton.o \ ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \ - ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o + ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o xml.o like.o: like.c like_match.c diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 9d9404bde4c..a99942010b6 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -2,7 +2,7 @@ * ruleutils.c - Functions to convert stored expressions/querytrees * back to source text * - * $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.235 2006/11/10 22:59:29 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.236 2006/12/21 16:05:15 petere Exp $ **********************************************************************/ #include "postgres.h" @@ -2988,6 +2988,7 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags) case T_CoalesceExpr: case T_MinMaxExpr: case T_NullIfExpr: + case T_XmlExpr: case T_Aggref: case T_FuncExpr: /* function-like: name(..) or name[..] */ @@ -3096,6 +3097,7 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags) case T_CoalesceExpr: /* own parentheses */ case T_MinMaxExpr: /* own parentheses */ case T_NullIfExpr: /* other separators */ + case T_XmlExpr: /* own parentheses */ case T_Aggref: /* own parentheses */ case T_CaseExpr: /* other separators */ return true; @@ -3144,6 +3146,7 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags) case T_CoalesceExpr: /* own parentheses */ case T_MinMaxExpr: /* own parentheses */ case T_NullIfExpr: /* other separators */ + case T_XmlExpr: /* own parentheses */ case T_Aggref: /* own parentheses */ case T_CaseExpr: /* other separators */ return true; @@ -3845,6 +3848,28 @@ get_rule_expr(Node *node, deparse_context *context, } break; + case T_XmlExpr: + { + XmlExpr *xexpr = (XmlExpr *) node; + + switch (xexpr->op) + { + case IS_XMLCONCAT: + appendStringInfo(buf, "XMLCONCAT("); + break; + case IS_XMLELEMENT: + appendStringInfo(buf, "XMLELEMENT("); + break; + case IS_XMLFOREST: + appendStringInfo(buf, "XMLFOREST("); + break; + } + get_rule_expr((Node *) xexpr->named_args, context, true); + get_rule_expr((Node *) xexpr->args, context, true); + appendStringInfoChar(buf, ')'); + } + break; + case T_CoerceToDomain: { CoerceToDomain *ctest = (CoerceToDomain *) node; diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c new file mode 100644 index 00000000000..8997730fc8d --- /dev/null +++ b/src/backend/utils/adt/xml.c @@ -0,0 +1,942 @@ +/*------------------------------------------------------------------------- + * + * xml.c + * XML data type support. + * + * + * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.1 2006/12/21 16:05:15 petere Exp $ + * + *------------------------------------------------------------------------- + */ + +/* + * Generally, XML type support is only available when libxml use was + * configured during the build. But even if that is not done, the + * type and all the functions are available, but most of them will + * fail. For one thing, this avoids having to manage variant catalog + * installations. But it also has nice effects such as that you can + * dump a database containing XML type data even if the server is not + * linked with libxml. + */ + +#include "postgres.h" + +#ifdef USE_LIBXML +#include <libxml/chvalid.h> +#include <libxml/parser.h> +#include <libxml/tree.h> +#include <libxml/uri.h> +#include <libxml/xmlerror.h> +#endif /* USE_LIBXML */ + +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "nodes/execnodes.h" +#include "utils/builtins.h" +#include "utils/xml.h" + + +#ifdef USE_LIBXML + +/* + * A couple of useful macros (similar to ones from libxml/parse.c) + */ +#define CMP4( s, c1, c2, c3, c4 ) \ + ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ + ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) +#define CMP5( s, c1, c2, c3, c4, c5 ) \ + ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) + +#define PG_XML_DEFAULT_URI "dummy.xml" +#define XML_ERRBUF_SIZE 200 + + +static void xml_init(void); +static void *xml_palloc(size_t size); +static void *xml_repalloc(void *ptr, size_t size); +static void xml_pfree(void *ptr); +static char *xml_pstrdup(const char *string); +static void xml_ereport(int level, char *msg, void *ctxt); +static void xml_errorHandler(void *ctxt, const char *msg, ...); +static void xml_ereport_by_code(int level, char *msg, int errcode); +static xmlChar *xml_text2xmlChar(text *in); +static xmlDocPtr xml_parse(text *data, int opts, bool is_document); + + +/* Global variables */ +/* taken from contrib/xml2 */ +/* FIXME: DO NOT USE global vars !!! */ +char *xml_errbuf; /* per line error buffer */ +char *xml_errmsg = NULL; /* overall error message */ + +#endif /* USE_LIBXML */ + + +#define NO_XML_SUPPORT() ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("no XML support in this installation"))) + + +Datum +xml_in(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + char *s = PG_GETARG_CSTRING(0); + size_t len; + xmltype *vardata; + + len = strlen(s); + vardata = palloc(len + VARHDRSZ); + VARATT_SIZEP(vardata) = len + VARHDRSZ; + memcpy(VARDATA(vardata), s, len); + + /* + * Parse the data to check if it is well-formed XML data. Assume + * that ERROR occurred if parsing failed. Do we need DTD + * validation (if DTD exists)? + */ + xml_parse(vardata, XML_PARSE_DTDATTR | XML_PARSE_DTDVALID, false); + + PG_RETURN_XML_P(vardata); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + +Datum +xml_out(PG_FUNCTION_ARGS) +{ + xmltype *s = PG_GETARG_XML_P(0); + char *result; + int32 len; + + len = VARSIZE(s) - VARHDRSZ; + result = palloc(len + 1); + memcpy(result, VARDATA(s), len); + result[len] = '\0'; + + PG_RETURN_CSTRING(result); +} + + +#ifdef USE_LIBXML +static void +appendStringInfoText(StringInfo str, const text *t) +{ + appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ); +} + + +static xmltype * +stringinfo_to_xmltype(StringInfo buf) +{ + int32 len; + xmltype *result; + + len = buf->len + VARHDRSZ; + result = palloc(len); + VARATT_SIZEP(result) = len; + memcpy(VARDATA(result), buf->data, buf->len); + + return result; +} +#endif + + +Datum +xmlcomment(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *arg = PG_GETARG_TEXT_P(0); + int len = VARATT_SIZEP(arg) - VARHDRSZ; + StringInfoData buf; + int i; + + /* check for "--" in string or "-" at the end */ + for (i = 1; i < len; i++) + if ((VARDATA(arg)[i] == '-' && VARDATA(arg)[i - 1] == '-') + || (VARDATA(arg)[i] == '-' && i == len - 1)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_XML_COMMENT), + errmsg("invalid XML comment"))); + + initStringInfo(&buf); + appendStringInfo(&buf, "<!--"); + appendStringInfoText(&buf, arg); + appendStringInfo(&buf, "-->"); + + PG_RETURN_XML_P(stringinfo_to_xmltype(&buf)); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + +Datum +xmlparse(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data; + bool is_document; + bool preserve_whitespace; + + data = PG_GETARG_TEXT_P(0); + + if (PG_NARGS() >= 2) + is_document = PG_GETARG_BOOL(1); + else + is_document = false; + + if (PG_NARGS() >= 3) + preserve_whitespace = PG_GETARG_BOOL(2); + else + /* + * Since the XMLPARSE grammar makes STRIP WHITESPACE the + * default, this argument should really default to false. But + * until we have actually implemented whitespace stripping, + * this would be annoying. + */ + preserve_whitespace = true; + + if (!preserve_whitespace) + ereport(WARNING, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("XMLPARSE with STRIP WHITESPACE is not implemented"))); + + /* + * Note, that here we try to apply DTD defaults + * (XML_PARSE_DTDATTR) according to SQL/XML:10.16.7.d: 'Default + * valies defined by internal DTD are applied'. As for external + * DTDs, we try to support them too, (see SQL/XML:10.16.7.e) + */ + xml_parse(data, XML_PARSE_DTDATTR, is_document); /* assume that ERROR occurred if parsing failed */ + + PG_RETURN_XML_P(data); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + +Datum +xmlpi(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + char *target = NameStr(*PG_GETARG_NAME(0)); + StringInfoData buf; + + if (strlen(target) >= 3 + && (target[0] == 'x' || target[0] == 'X') + && (target[1] == 'm' || target[1] == 'M') + && (target[2] == 'l' || target[2] == 'L')) + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid XML processing instruction"), + errdetail("XML processing instruction target name cannot start with \"xml\"."))); + } + + initStringInfo(&buf); + + appendStringInfo(&buf, "<?"); + appendStringInfoString(&buf, map_sql_identifier_to_xml_name(target, false)); + if (PG_NARGS() > 1) + { + text *arg = PG_GETARG_TEXT_P(1); + char *string; + + string = DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(arg))); + if (strstr(string, "?>")) + ereport(ERROR, + (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION), + errmsg("invalid XML processing instruction"), + errdetail("XML processing instruction cannot contain \"?>\"."))); + + appendStringInfoString(&buf, " "); + appendStringInfoString(&buf, string); + } + appendStringInfoString(&buf, "?>"); + + PG_RETURN_XML_P(stringinfo_to_xmltype(&buf)); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + +Datum +xmlroot(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + xmltype *data; + text *version; + int standalone; + StringInfoData buf; + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + else + data = PG_GETARG_XML_P(0); + + if (PG_ARGISNULL(1)) + version = NULL; + else + version = PG_GETARG_TEXT_P(1); + + if (PG_ARGISNULL(2)) + standalone = 0; + else + { + bool tmp = PG_GETARG_BOOL(2); + standalone = (tmp ? 1 : -1); + } + + /* + * FIXME: This is probably supposed to be cleverer if there + * already is an XML preamble. + */ + initStringInfo(&buf); + + appendStringInfo(&buf,"<?xml"); + if (version) { + appendStringInfo(&buf, " version=\""); + appendStringInfoText(&buf, version); + appendStringInfo(&buf, "\""); + } + if (standalone) + appendStringInfo(&buf, " standalone=\"%s\"", (standalone == 1 ? "yes" : "no")); + appendStringInfo(&buf, "?>"); + appendStringInfoText(&buf, (text *) data); + + PG_RETURN_XML_P(stringinfo_to_xmltype(&buf)); +#else + NO_XML_SUPPORT(); + return 0; +#endif +} + + +/* + * Validate document (given as string) against DTD (given as external link) + * TODO !!! use text instead of cstring for second arg + * TODO allow passing DTD as a string value (not only as an URI) + * TODO redesign (see comment with '!!!' below) + */ +Datum +xmlvalidate(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + text *data = PG_GETARG_TEXT_P(0); + text *dtdOrUri = PG_GETARG_TEXT_P(1); + bool result = FALSE; + xmlParserCtxtPtr ctxt; /* the parser context */ + xmlDocPtr doc; /* the resulting document tree */ + xmlDtdPtr dtd; + + xml_init(); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) + xml_ereport(ERROR, "could not allocate parser context", ctxt); + doc = xmlCtxtReadMemory(ctxt, (char *) VARDATA(data), + VARSIZE(data) - VARHDRSZ, PG_XML_DEFAULT_URI, NULL, 0); + if (doc == NULL) + xml_ereport(ERROR, "could not parse XML data", ctxt); + +#if 0 + uri = xmlCreateURI(); + ereport(NOTICE, (errcode(0),errmsg(" dtd - %s", dtdOrUri))); + dtd = palloc(sizeof(xmlDtdPtr)); + uri = xmlParseURI(dtdOrUri); + if (uri == NULL) + xml_ereport(ERROR, "not implemented yet... (TODO)", ctxt); + else +#endif + dtd = xmlParseDTD(NULL, xml_text2xmlChar(dtdOrUri)); + + if (dtd == NULL) + { +#if 0 + xmlFreeDoc(doc); + xmlFreeParserCtxt(ctxt); +#endif + xml_ereport(ERROR, "could not load DTD", ctxt); + } + + if (xmlValidateDtd(xmlNewValidCtxt(), doc, dtd) == 1) + result = TRUE; + +#if 0 + xmlFreeURI(uri); + xmlFreeDtd(dtd); + xmlFreeDoc(doc); + xmlFreeParserCtxt(ctxt); + xmlCleanupParser(); +#endif + + if (!result) + xml_ereport(NOTICE, "validation against DTD failed", ctxt); + + PG_RETURN_BOOL(result); +#else /* not USE_LIBXML */ + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} + + +#ifdef USE_LIBXML + +/* + * Container for some init stuff (not good design!) + * TODO xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and check) + */ +static void +xml_init(void) +{ + /* + * Currently, we have no pure UTF-8 support for internals -- check + * if we can work. + */ + if (sizeof (char) != sizeof (xmlChar)) + ereport(ERROR, + (errmsg("cannot initialize XML library"), + errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.", + sizeof(char), sizeof(xmlChar)))); + + xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup); + xmlInitParser(); + LIBXML_TEST_VERSION; + /* do not flood PG's logfile with libxml error messages - reset error handler*/ + xmlSetGenericErrorFunc(NULL, xml_errorHandler); + xml_errmsg = NULL; + xml_errbuf = palloc(XML_ERRBUF_SIZE); + memset(xml_errbuf, 0, XML_ERRBUF_SIZE); +} + + +/* + * Convert a C string to XML internal representation + * (same things as for TEXT, but with checking the data for well-formedness + * and, moreover, validation against DTD, if needed). + * NOTICE: We use TEXT type as internal storage type. In the future, + * we plan to create own storage type (maybe several types/strategies) + * TODO predefined DTDs / XSDs and validation + * TODO validation against XML Schema + * TODO maybe, libxml2's xmlreader is better? (do not construct DOM, yet do not use SAX - see xml_reader.c) + * TODO what about internal URI for docs? (see PG_XML_DEFAULT_URI below) + */ +static xmlDocPtr +xml_parse(text *data, int opts, bool is_document) +{ + bool validationFailed = FALSE; + xmlParserCtxtPtr ctxt; /* the parser context */ + xmlDocPtr doc; /* the resulting document tree */ + int res_code; + int32 len; + xmlChar *string; +#ifdef XML_DEBUG_DTD_CONST + xmlDtdPtr dtd; /* pointer to DTD */ +#endif + + xml_init(); + + len = VARSIZE(data) - VARHDRSZ; /* will be useful later */ + string = xml_text2xmlChar(data); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) + xml_ereport(ERROR, "could not allocate parser context", ctxt); + + /* first, we try to parse the string as it is XML doc, then, as XML chunk */ + ereport(DEBUG3, (errmsg("string to parse: %s", string))); + if (len > 4 && CMP5(string, '<', '?', 'x', 'm', 'l')) + { + /* consider it as DOCUMENT */ + doc = xmlCtxtReadMemory(ctxt, string, len, PG_XML_DEFAULT_URI, NULL, opts); + if (doc == NULL) + { + xml_ereport(ERROR, "could not parse XML data", ctxt); +#if 0 + xmlFreeParserCtxt(ctxt); + xmlCleanupParser(); + ereport(ERROR, (errmsg("could not parse XML data"))); +#endif + } + } + else + { + /* attempt to parse the string as if it is an XML fragment */ + ereport(DEBUG3, (errmsg("the string is not an XML doc, trying to parse as a CHUNK"))); + doc = xmlNewDoc(NULL); + /* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */ + res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string, NULL); + if (res_code != 0) + { + xmlFreeParserCtxt(ctxt); + xmlCleanupParser(); + xml_ereport_by_code(ERROR, "could not parse XML data", res_code); + } + } + +#ifdef XML_DEBUG_DTD_CONST + dtd = xmlParseDTD(NULL, (xmlChar *) XML_DEBUG_DTD_CONST); + xml_ereport(DEBUG3, "solid path to DTD was defined for debugging purposes", ctxt); + if (dtd == NULL) + { + xml_ereport(ERROR, "could not parse DTD data", ctxt); + } + else +#else + /* if dtd for our xml data is detected... */ + if ((doc->intSubset != NULL) || (doc->extSubset != NULL)) +#endif + { + /* assume that inline DTD exists - validation should be performed */ +#ifdef XML_DEBUG_DTD_CONST + if (xmlValidateDtd(xmlNewValidCtxt(), doc, dtd) != 1) +#else + if (ctxt->valid == 0) +#endif + { + /* DTD exists, but validator reported 'validation failed' */ + validationFailed = TRUE; + } + } + + if (validationFailed) + xml_ereport(WARNING, "validation against DTD failed", ctxt); + + /* TODO encoding issues + * (thoughts: + * CASE: + * - XML data has explicit encoding attribute in its prolog + * - if not, assume that enc. of XML data is the same as client's one + * + * The common rule is to accept the XML data only if its encoding + * is the same as encoding of the storage (server's). The other possible + * option is to accept all the docs, but DO TRANSFORMATION and, if needed, + * change the prolog. + * + * I think I'd stick the first way (for the 1st version), + * it's much simplier (less errors...) + * ) */ + /* ... */ + + xmlFreeParserCtxt(ctxt); + xmlCleanupParser(); + + ereport(DEBUG3, (errmsg("XML data successfully parsed, encoding: %s", + (char *) doc->encoding))); + + return doc; +} + + +/* + * xmlChar<->text convertions + */ +static xmlChar * +xml_text2xmlChar(text *in) +{ + int32 len = VARSIZE(in) - VARHDRSZ; + xmlChar *res; + + res = palloc(len + 1); + memcpy(res, VARDATA(in), len); + res[len] = '\0'; + + return(res); +} + + +/* + * Wrappers for memory management functions + */ +static void * +xml_palloc(size_t size) +{ + return palloc(size); +} + + +static void * +xml_repalloc(void *ptr, size_t size) +{ + return repalloc(ptr, size); +} + + +static void +xml_pfree(void *ptr) +{ + pfree(ptr); +} + + +static char * +xml_pstrdup(const char *string) +{ + return pstrdup(string); +} + + +/* + * Wrapper for "ereport" function. + * Adds detail - libxml's native error message, if any. + */ +static void +xml_ereport(int level, char *msg, void *ctxt) +{ + char *xmlErrDetail; + int xmlErrLen, i; + xmlErrorPtr libxmlErr = NULL; + + if (xml_errmsg != NULL) + { + ereport(DEBUG1, (errmsg("%s", xml_errmsg))); + pfree(xml_errmsg); + } + + if (ctxt != NULL) + libxmlErr = xmlCtxtGetLastError(ctxt); + + if (libxmlErr == NULL) + { + if (level == ERROR) + { + xmlFreeParserCtxt(ctxt); + xmlCleanupParser(); + } + ereport(level, (errmsg(msg))); + } + else + { + /* as usual, libxml error message contains '\n'; get rid of it */ + xmlErrLen = strlen(libxmlErr->message); /* - 1; */ + xmlErrDetail = (char *) palloc(xmlErrLen); + for (i = 0; i < xmlErrLen; i++) + { + if (libxmlErr->message[i] == '\n') + xmlErrDetail[i] = '.'; + else + xmlErrDetail[i] = libxmlErr->message[i]; + } + if (level == ERROR) + { + xmlFreeParserCtxt(ctxt); + xmlCleanupParser(); + } + ereport(level, (errmsg(msg), errdetail("%s", xmlErrDetail))); + } +} + + +/* + * Error handler for libxml error messages + */ +static void +xml_errorHandler(void *ctxt, const char *msg,...) +{ + va_list args; + + va_start(args, msg); + vsnprintf(xml_errbuf, XML_ERRBUF_SIZE, msg, args); + va_end(args); + /* Now copy the argument across */ + if (xml_errmsg == NULL) + xml_errmsg = pstrdup(xml_errbuf); + else + { + int32 xsize = strlen(xml_errmsg); + + xml_errmsg = repalloc(xml_errmsg, (size_t) (xsize + strlen(xml_errbuf) + 1)); + strncpy(&xml_errmsg[xsize - 1], xml_errbuf, strlen(xml_errbuf)); + xml_errmsg[xsize + strlen(xml_errbuf) - 1] = '\0'; + } + memset(xml_errbuf, 0, XML_ERRBUF_SIZE); +} + + +/* + * Return error message by libxml error code + * TODO make them closer to recommendations from Postgres manual + */ +static void +xml_ereport_by_code(int level, char *msg, int code) +{ + const char *det; + + if (code < 0) + { + ereport(level, (errmsg(msg))); + return; + } + + switch (code) { + case XML_ERR_INTERNAL_ERROR: + det = "libxml internal error"; + break; + case XML_ERR_ENTITY_LOOP: + det = "Detected an entity reference loop"; + break; + case XML_ERR_ENTITY_NOT_STARTED: + det = "EntityValue: \" or ' expected"; + break; + case XML_ERR_ENTITY_NOT_FINISHED: + det = "EntityValue: \" or ' expected"; + break; + case XML_ERR_ATTRIBUTE_NOT_STARTED: + det = "AttValue: \" or ' expected"; + break; + case XML_ERR_LT_IN_ATTRIBUTE: + det = "Unescaped '<' not allowed in attributes values"; + break; + case XML_ERR_LITERAL_NOT_STARTED: + det = "SystemLiteral \" or ' expected"; + break; + case XML_ERR_LITERAL_NOT_FINISHED: + det = "Unfinished System or Public ID \" or ' expected"; + break; + case XML_ERR_MISPLACED_CDATA_END: + det = "Sequence ']]>' not allowed in content"; + break; + case XML_ERR_URI_REQUIRED: + det = "SYSTEM or PUBLIC, the URI is missing"; + break; + case XML_ERR_PUBID_REQUIRED: + det = "PUBLIC, the Public Identifier is missing"; + break; + case XML_ERR_HYPHEN_IN_COMMENT: + det = "Comment must not contain '--' (double-hyphen)"; + break; + case XML_ERR_PI_NOT_STARTED: + det = "xmlParsePI : no target name"; + break; + case XML_ERR_RESERVED_XML_NAME: + det = "Invalid PI name"; + break; + case XML_ERR_NOTATION_NOT_STARTED: + det = "NOTATION: Name expected here"; + break; + case XML_ERR_NOTATION_NOT_FINISHED: + det = "'>' required to close NOTATION declaration"; + break; + case XML_ERR_VALUE_REQUIRED: + det = "Entity value required"; + break; + case XML_ERR_URI_FRAGMENT: + det = "Fragment not allowed"; + break; + case XML_ERR_ATTLIST_NOT_STARTED: + det = "'(' required to start ATTLIST enumeration"; + break; + case XML_ERR_NMTOKEN_REQUIRED: + det = "NmToken expected in ATTLIST enumeration"; + break; + case XML_ERR_ATTLIST_NOT_FINISHED: + det = "')' required to finish ATTLIST enumeration"; + break; + case XML_ERR_MIXED_NOT_STARTED: + det = "MixedContentDecl : '|' or ')*' expected"; + break; + case XML_ERR_PCDATA_REQUIRED: + det = "MixedContentDecl : '#PCDATA' expected"; + break; + case XML_ERR_ELEMCONTENT_NOT_STARTED: + det = "ContentDecl : Name or '(' expected"; + break; + case XML_ERR_ELEMCONTENT_NOT_FINISHED: + det = "ContentDecl : ',' '|' or ')' expected"; + break; + case XML_ERR_PEREF_IN_INT_SUBSET: + det = "PEReference: forbidden within markup decl in internal subset"; + break; + case XML_ERR_GT_REQUIRED: + det = "Expected '>'"; + break; + case XML_ERR_CONDSEC_INVALID: + det = "XML conditional section '[' expected"; + break; + case XML_ERR_EXT_SUBSET_NOT_FINISHED: + det = "Content error in the external subset"; + break; + case XML_ERR_CONDSEC_INVALID_KEYWORD: + det = "conditional section INCLUDE or IGNORE keyword expected"; + break; + case XML_ERR_CONDSEC_NOT_FINISHED: + det = "XML conditional section not closed"; + break; + case XML_ERR_XMLDECL_NOT_STARTED: + det = "Text declaration '<?xml' required"; + break; + case XML_ERR_XMLDECL_NOT_FINISHED: + det = "parsing XML declaration: '?>' expected"; + break; + case XML_ERR_EXT_ENTITY_STANDALONE: + det = "external parsed entities cannot be standalone"; + break; + case XML_ERR_ENTITYREF_SEMICOL_MISSING: + det = "EntityRef: expecting ';'"; + break; + case XML_ERR_DOCTYPE_NOT_FINISHED: + det = "DOCTYPE improperly terminated"; + break; + case XML_ERR_LTSLASH_REQUIRED: + det = "EndTag: '</' not found"; + break; + case XML_ERR_EQUAL_REQUIRED: + det = "Expected '='"; + break; + case XML_ERR_STRING_NOT_CLOSED: + det = "String not closed expecting \" or '"; + break; + case XML_ERR_STRING_NOT_STARTED: + det = "String not started expecting ' or \""; + break; + case XML_ERR_ENCODING_NAME: + det = "Invalid XML encoding name"; + break; + case XML_ERR_STANDALONE_VALUE: + det = "Standalone accepts only 'yes' or 'no'"; + break; + case XML_ERR_DOCUMENT_EMPTY: + det = "Document is empty"; + break; + case XML_ERR_DOCUMENT_END: + det = "Extra content at the end of the document"; + break; + case XML_ERR_NOT_WELL_BALANCED: + det = "Chunk is not well balanced"; + break; + case XML_ERR_EXTRA_CONTENT: + det = "Extra content at the end of well balanced chunk"; + break; + case XML_ERR_VERSION_MISSING: + det = "Malformed declaration expecting version"; + break; + /* more err codes... Please, keep the order! */ + case XML_ERR_ATTRIBUTE_WITHOUT_VALUE: /* 41 */ + det ="Attribute without value"; + break; + case XML_ERR_ATTRIBUTE_REDEFINED: + det ="Attribute defined more than once in the same element"; + break; + case XML_ERR_COMMENT_NOT_FINISHED: /* 45 */ + det = "Comment is not finished"; + break; + case XML_ERR_NAME_REQUIRED: /* 68 */ + det = "Element name not found"; + break; + case XML_ERR_TAG_NOT_FINISHED: /* 77 */ + det = "Closing tag not found"; + break; + default: + det = "Unregistered error (libxml error code: %d)"; + ereport(DEBUG1, (errmsg("Check out \"libxml/xmlerror.h\" and bring errcode \"%d\" processing to \"xml.c\".", code))); + } + + if (xml_errmsg != NULL) + { + ereport(DEBUG1, (errmsg("%s", xml_errmsg))); + pfree(xml_errmsg); + } + + ereport(level, (errmsg(msg), errdetail(det, code))); +} + + +/* + * Convert one char in the current server encoding to a Unicode + * codepoint. + */ +static pg_wchar +sqlchar_to_unicode(unsigned char *s) +{ + int save_enc; + pg_wchar ret; + char *utf8string = pg_do_encoding_conversion(s, pg_mblen(s), GetDatabaseEncoding(), PG_UTF8); + + save_enc = GetDatabaseEncoding(); + SetDatabaseEncoding(PG_UTF8); + pg_mb2wchar_with_len(utf8string, &ret, pg_mblen(s)); + SetDatabaseEncoding(save_enc); + + return ret; +} + + +static bool +is_valid_xml_namefirst(pg_wchar c) +{ + /* (Letter | '_' | ':') */ + return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c) + || c == '_' || c == ':'); +} + + +static bool +is_valid_xml_namechar(pg_wchar c) +{ + /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */ + return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c) + || xmlIsDigitQ(c) + || c == '.' || c == '-' || c == '_' || c == ':' + || xmlIsCombiningQ(c) + || xmlIsExtenderQ(c)); +} +#endif /* USE_LIBXML */ + + +/* + * Map SQL identifier to XML name; see SQL/XML:2003 section 9.1. + */ +char * +map_sql_identifier_to_xml_name(unsigned char *ident, bool fully_escaped) +{ +#ifdef USE_LIBXML + StringInfoData buf; + unsigned char *p; + + initStringInfo(&buf); + + for (p = ident; *p; p += pg_mblen(p)) + { + if (*p == ':' && (p == ident || fully_escaped)) + appendStringInfo(&buf, "_x003A_"); + else if (*p == '_' && *(p+1) == 'x') + appendStringInfo(&buf, "_x005F_"); + else if (fully_escaped && p == ident + && ( *p == 'x' || *p == 'X') + && ( *(p+1) == 'm' || *(p+1) == 'M') + && ( *(p+2) == 'l' || *(p+2) == 'L')) + { + if (*p == 'x') + appendStringInfo(&buf, "_x0078_"); + else + appendStringInfo(&buf, "_x0058_"); + } + else + { + pg_wchar u = sqlchar_to_unicode(p); + + if (!is_valid_xml_namechar(u) + || (p == ident && !is_valid_xml_namefirst(u))) + appendStringInfo(&buf, "_x%04X_", (unsigned int) u); + else + appendBinaryStringInfo(&buf, p, pg_mblen(p)); + } + } + + return buf.data; +#else /* not USE_LIBXML */ + NO_XML_SUPPORT(); + return NULL; +#endif /* not USE_LIBXML */ +} diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index e91c8a2a58c..89845b08c16 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -4,7 +4,7 @@ * (currently mule internal code (mic) is used) * Tatsuo Ishii * - * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.59 2006/10/04 00:30:02 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.60 2006/12/21 16:05:15 petere Exp $ */ #include "postgres.h" @@ -599,7 +599,7 @@ void SetDatabaseEncoding(int encoding) { if (!PG_VALID_BE_ENCODING(encoding)) - elog(ERROR, "invalid database encoding"); + elog(ERROR, "invalid database encoding: %d", encoding); DatabaseEncoding = &pg_enc2name_tbl[encoding]; Assert(DatabaseEncoding->encoding == encoding); |