diff options
Diffstat (limited to 'src/backend/utils/adt/xml.c')
-rw-r--r-- | src/backend/utils/adt/xml.c | 558 |
1 files changed, 551 insertions, 7 deletions
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index e8bce3b806d..f2e5224fc3f 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -73,6 +73,7 @@ #include "commands/dbcommands.h" #include "executor/executor.h" #include "executor/spi.h" +#include "executor/tablefunc.h" #include "fmgr.h" #include "lib/stringinfo.h" #include "libpq/pqformat.h" @@ -145,6 +146,7 @@ static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt); static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, ArrayBuildState *astate, PgXmlErrorContext *xmlerrcxt); +static xmlChar *pg_xmlCharStrndup(char *str, size_t len); #endif /* USE_LIBXML */ static StringInfo query_to_xml_internal(const char *query, char *tablename, @@ -165,6 +167,49 @@ static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename, bool nulls, bool tableforest, const char *targetns, bool top_level); +/* XMLTABLE support */ +#ifdef USE_LIBXML +/* random number to identify XmlTableContext */ +#define XMLTABLE_CONTEXT_MAGIC 46922182 +typedef struct XmlTableBuilderData +{ + int magic; + int natts; + long int row_count; + PgXmlErrorContext *xmlerrcxt; + xmlParserCtxtPtr ctxt; + xmlDocPtr doc; + xmlXPathContextPtr xpathcxt; + xmlXPathCompExprPtr xpathcomp; + xmlXPathObjectPtr xpathobj; + xmlXPathCompExprPtr *xpathscomp; +} XmlTableBuilderData; +#endif + +static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts); +static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value); +static void XmlTableSetNamespace(struct TableFuncScanState *state, char *name, + char *uri); +static void XmlTableSetRowFilter(struct TableFuncScanState *state, char *path); +static void XmlTableSetColumnFilter(struct TableFuncScanState *state, + char *path, int colnum); +static bool XmlTableFetchRow(struct TableFuncScanState *state); +static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum, + Oid typid, int32 typmod, bool *isnull); +static void XmlTableDestroyOpaque(struct TableFuncScanState *state); + +const TableFuncRoutine XmlTableRoutine = +{ + XmlTableInitOpaque, + XmlTableSetDocument, + XmlTableSetNamespace, + XmlTableSetRowFilter, + XmlTableSetColumnFilter, + XmlTableFetchRow, + XmlTableGetValue, + XmlTableDestroyOpaque +}; + #define NO_XML_SUPPORT() \ ereport(ERROR, \ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ @@ -1113,6 +1158,19 @@ xml_pnstrdup(const xmlChar *str, size_t len) return result; } +/* Ditto, except input is char* */ +static xmlChar * +pg_xmlCharStrndup(char *str, size_t len) +{ + xmlChar *result; + + result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar)); + memcpy(result, str, len); + result[len] = '\0'; + + return result; +} + /* * str is the null-terminated input string. Remaining arguments are * output arguments; each can be NULL if value is not wanted. @@ -3811,13 +3869,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("empty XPath expression"))); - string = (xmlChar *) palloc((len + 1) * sizeof(xmlChar)); - memcpy(string, datastr, len); - string[len] = '\0'; - - xpath_expr = (xmlChar *) palloc((xpath_len + 1) * sizeof(xmlChar)); - memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len); - xpath_expr[xpath_len] = '\0'; + string = pg_xmlCharStrndup(datastr, len); + xpath_expr = pg_xmlCharStrndup(VARDATA(xpath_expr_text), xpath_len); xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); @@ -4065,3 +4118,494 @@ xml_is_well_formed_content(PG_FUNCTION_ARGS) return 0; #endif /* not USE_LIBXML */ } + +/* + * support functions for XMLTABLE + * + */ +#ifdef USE_LIBXML + +/* + * Returns private data from executor state. Ensure validity by check with + * MAGIC number. + */ +static inline XmlTableBuilderData * +GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname) +{ + XmlTableBuilderData *result; + + if (!IsA(state, TableFuncScanState)) + elog(ERROR, "%s called with invalid TableFuncScanState", fname); + result = (XmlTableBuilderData *) state->opaque; + if (result->magic != XMLTABLE_CONTEXT_MAGIC) + elog(ERROR, "%s called with invalid TableFuncScanState", fname); + + return result; +} +#endif + +/* + * XmlTableInitOpaque + * Fill in TableFuncScanState->opaque for XmlTable processor; initialize + * the XML parser. + * + * Note: Because we call pg_xml_init() here and pg_xml_done() in + * XmlTableDestroyOpaque, it is critical for robustness that no other + * executor nodes run until this node is processed to completion. Caller + * must execute this to completion (probably filling a tuplestore to exhaust + * this node in a single pass) instead of using row-per-call mode. + */ +static void +XmlTableInitOpaque(TableFuncScanState *state, int natts) +{ +#ifdef USE_LIBXML + volatile xmlParserCtxtPtr ctxt = NULL; + XmlTableBuilderData *xtCxt; + PgXmlErrorContext *xmlerrcxt; + + xtCxt = palloc0(sizeof(XmlTableBuilderData)); + xtCxt->magic = XMLTABLE_CONTEXT_MAGIC; + xtCxt->natts = natts; + xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts); + + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); + + PG_TRY(); + { + xmlInitParser(); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate parser context"); + } + PG_CATCH(); + { + if (ctxt != NULL) + xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xtCxt->xmlerrcxt = xmlerrcxt; + xtCxt->ctxt = ctxt; + + state->opaque = xtCxt; +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableSetDocument + * Install the input document + */ +static void +XmlTableSetDocument(TableFuncScanState *state, Datum value) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + xmltype *xmlval = DatumGetXmlP(value); + char *str; + xmlChar *xstr; + int length; + volatile xmlDocPtr doc = NULL; + volatile xmlXPathContextPtr xpathcxt = NULL; + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument"); + + /* + * Use out function for casting to string (remove encoding property). See + * comment in xml_out. + */ + str = xml_out_internal(xmlval, 0); + + length = strlen(str); + xstr = pg_xmlCharStrndup(str, length); + + PG_TRY(); + { + doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0); + if (doc == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, + "could not parse XML document"); + xpathcxt = xmlXPathNewContext(doc); + if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate XPath context"); + xpathcxt->node = xmlDocGetRootElement(doc); + if (xpathcxt->node == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not find root XML element"); + } + PG_CATCH(); + { + if (xpathcxt != NULL) + xmlXPathFreeContext(xpathcxt); + if (doc != NULL) + xmlFreeDoc(doc); + + PG_RE_THROW(); + } + PG_END_TRY(); + + xtCxt->doc = doc; + xtCxt->xpathcxt = xpathcxt; +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableSetNamespace + * Add a namespace declaration + */ +static void +XmlTableSetNamespace(TableFuncScanState *state, char *name, char *uri) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + + if (name == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("DEFAULT namespace is not supported"))); + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace"); + + if (xmlXPathRegisterNs(xtCxt->xpathcxt, + pg_xmlCharStrndup(name, strlen(name)), + pg_xmlCharStrndup(uri, strlen(uri)))) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION, + "could not set XML namespace"); +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableSetRowFilter + * Install the row-filter Xpath expression. + */ +static void +XmlTableSetRowFilter(TableFuncScanState *state, char *path) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + xmlChar *xstr; + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter"); + + if (*path == '\0') + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("row path filter must not be empty string"))); + + xstr = pg_xmlCharStrndup(path, strlen(path)); + + xtCxt->xpathcomp = xmlXPathCompile(xstr); + if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR, + "invalid XPath expression"); +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableSetColumnFilter + * Install the column-filter Xpath expression, for the given column. + */ +static void +XmlTableSetColumnFilter(TableFuncScanState *state, char *path, int colnum) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + xmlChar *xstr; + + AssertArg(PointerIsValid(path)); + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter"); + + if (*path == '\0') + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("column path filter must not be empty string"))); + + xstr = pg_xmlCharStrndup(path, strlen(path)); + + xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr); + if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION, + "invalid XPath expression"); +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableFetchRow + * Prepare the next "current" tuple for upcoming GetValue calls. + * Returns FALSE if the row-filter expression returned no more rows. + */ +static bool +XmlTableFetchRow(TableFuncScanState *state) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow"); + + /* + * XmlTable returns table - set of composite values. The error context, is + * used for producement more values, between two calls, there can be + * created and used another libxml2 error context. It is libxml2 global + * value, so it should be refreshed any time before any libxml2 usage, + * that is finished by returning some value. + */ + xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler); + + if (xtCxt->xpathobj == NULL) + { + xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt); + if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not create XPath object"); + + xtCxt->row_count = 0; + } + + if (xtCxt->xpathobj->type == XPATH_NODESET) + { + if (xtCxt->xpathobj->nodesetval != NULL) + { + if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr) + return true; + } + } + + return false; +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ + + return false; +} + +/* + * XmlTableGetValue + * Return the value for column number 'colnum' for the current row. If + * column -1 is requested, return representation of the whole row. + * + * This leaks memory, so be sure to reset often the context in which it's + * called. + */ +static Datum +XmlTableGetValue(TableFuncScanState *state, int colnum, + Oid typid, int32 typmod, bool *isnull) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + Datum result = (Datum) 0; + xmlNodePtr cur; + char *cstr = NULL; + volatile xmlXPathObjectPtr xpathobj = NULL; + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue"); + + Assert(xtCxt->xpathobj && + xtCxt->xpathobj->type == XPATH_NODESET && + xtCxt->xpathobj->nodesetval != NULL); + + /* Propagate context related error context to libxml2 */ + xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler); + + *isnull = false; + + cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1]; + + Assert(xtCxt->xpathscomp[colnum] != NULL); + + PG_TRY(); + { + /* Set current node as entry point for XPath evaluation */ + xmlXPathSetContextNode(cur, xtCxt->xpathcxt); + + /* Evaluate column path */ + xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt); + if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred) + xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not create XPath object"); + + /* + * There are four possible cases, depending on the number of nodes + * returned by the XPath expression and the type of the target column: + * a) XPath returns no nodes. b) One node is returned, and column is + * of type XML. c) One node, column type other than XML. d) Multiple + * nodes are returned. + */ + if (xpathobj->type == XPATH_NODESET) + { + int count = 0; + + if (xpathobj->nodesetval != NULL) + count = xpathobj->nodesetval->nodeNr; + + if (xpathobj->nodesetval == NULL || count == 0) + { + *isnull = true; + } + else if (count == 1 && typid == XMLOID) + { + text *textstr; + + /* simple case, result is one value */ + textstr = xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[0], + xtCxt->xmlerrcxt); + cstr = text_to_cstring(textstr); + } + else if (count == 1) + { + xmlChar *str; + + str = xmlNodeListGetString(xtCxt->doc, + xpathobj->nodesetval->nodeTab[0]->xmlChildrenNode, + 1); + + if (str != NULL) + { + PG_TRY(); + { + cstr = pstrdup((char *) str); + } + PG_CATCH(); + { + xmlFree(str); + PG_RE_THROW(); + } + PG_END_TRY(); + xmlFree(str); + } + else + { + /* + * This line ensure mapping of empty tags to PostgreSQL + * value. Usually we would to map a empty tag to empty + * string. But this mapping can create empty string when + * user doesn't expect it - when empty tag is enforced + * by libxml2 - when user uses a text() function for + * example. + */ + cstr = ""; + } + } + else + { + StringInfoData str; + int i; + + Assert(count > 1); + + /* + * When evaluating the XPath expression returns multiple + * nodes, the result is the concatenation of them all. The + * target type must be XML. + */ + if (typid != XMLOID) + ereport(ERROR, + (errcode(ERRCODE_CARDINALITY_VIOLATION), + errmsg("more than one value returned by column XPath expression"))); + + /* Concatenate serialized values */ + initStringInfo(&str); + for (i = 0; i < count; i++) + { + appendStringInfoText(&str, + xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i], + xtCxt->xmlerrcxt)); + } + cstr = str.data; + } + } + else if (xpathobj->type == XPATH_STRING) + { + cstr = (char *) xpathobj->stringval; + } + else + elog(ERROR, "unexpected XPath object type %u", xpathobj->type); + + /* + * By here, either cstr contains the result value, or the isnull flag + * has been set. + */ + Assert(cstr || *isnull); + + if (!*isnull) + result = InputFunctionCall(&state->in_functions[colnum], + cstr, + state->typioparams[colnum], + typmod); + } + PG_CATCH(); + { + if (xpathobj != NULL) + xmlXPathFreeObject(xpathobj); + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlXPathFreeObject(xpathobj); + + return result; +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} + +/* + * XmlTableDestroyOpaque + * Release all libxml2 resources + */ +static void +XmlTableDestroyOpaque(TableFuncScanState *state) +{ +#ifdef USE_LIBXML + XmlTableBuilderData *xtCxt; + + xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque"); + + /* Propagate context related error context to libxml2 */ + xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler); + + if (xtCxt->xpathscomp != NULL) + { + int i; + + for (i = 0; i < xtCxt->natts; i++) + if (xtCxt->xpathscomp[i] != NULL) + xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]); + } + + if (xtCxt->xpathobj != NULL) + xmlXPathFreeObject(xtCxt->xpathobj); + if (xtCxt->xpathcomp != NULL) + xmlXPathFreeCompExpr(xtCxt->xpathcomp); + if (xtCxt->xpathcxt != NULL) + xmlXPathFreeContext(xtCxt->xpathcxt); + if (xtCxt->doc != NULL) + xmlFreeDoc(xtCxt->doc); + if (xtCxt->ctxt != NULL) + xmlFreeParserCtxt(xtCxt->ctxt); + + pg_xml_done(xtCxt->xmlerrcxt, true); + + /* not valid anymore */ + xtCxt->magic = 0; + state->opaque = NULL; + +#else + NO_XML_SUPPORT(); +#endif /* not USE_LIBXML */ +} |