aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/xml.c
diff options
context:
space:
mode:
authorPeter Eisentraut <peter_e@gmx.net>2006-12-21 16:05:16 +0000
committerPeter Eisentraut <peter_e@gmx.net>2006-12-21 16:05:16 +0000
commit8c1de5fb0010ae712568f1706b737270c3609bd8 (patch)
treebc328a654c41ea3eb1a9a27b76fd5215fb698608 /src/backend/utils/adt/xml.c
parented1e9cd501b4dc89a6a7e5cef702f2f6830ae829 (diff)
downloadpostgresql-8c1de5fb0010ae712568f1706b737270c3609bd8.tar.gz
postgresql-8c1de5fb0010ae712568f1706b737270c3609bd8.zip
Initial SQL/XML support: xml data type and initial set of functions.
Diffstat (limited to 'src/backend/utils/adt/xml.c')
-rw-r--r--src/backend/utils/adt/xml.c942
1 files changed, 942 insertions, 0 deletions
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
new file mode 100644
index 00000000000..8997730fc8d
--- /dev/null
+++ b/src/backend/utils/adt/xml.c
@@ -0,0 +1,942 @@
+/*-------------------------------------------------------------------------
+ *
+ * xml.c
+ * XML data type support.
+ *
+ *
+ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.1 2006/12/21 16:05:15 petere Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Generally, XML type support is only available when libxml use was
+ * configured during the build. But even if that is not done, the
+ * type and all the functions are available, but most of them will
+ * fail. For one thing, this avoids having to manage variant catalog
+ * installations. But it also has nice effects such as that you can
+ * dump a database containing XML type data even if the server is not
+ * linked with libxml.
+ */
+
+#include "postgres.h"
+
+#ifdef USE_LIBXML
+#include <libxml/chvalid.h>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <libxml/uri.h>
+#include <libxml/xmlerror.h>
+#endif /* USE_LIBXML */
+
+#include "fmgr.h"
+#include "mb/pg_wchar.h"
+#include "nodes/execnodes.h"
+#include "utils/builtins.h"
+#include "utils/xml.h"
+
+
+#ifdef USE_LIBXML
+
+/*
+ * A couple of useful macros (similar to ones from libxml/parse.c)
+ */
+#define CMP4( s, c1, c2, c3, c4 ) \
+ ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
+ ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
+#define CMP5( s, c1, c2, c3, c4, c5 ) \
+ ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
+
+#define PG_XML_DEFAULT_URI "dummy.xml"
+#define XML_ERRBUF_SIZE 200
+
+
+static void xml_init(void);
+static void *xml_palloc(size_t size);
+static void *xml_repalloc(void *ptr, size_t size);
+static void xml_pfree(void *ptr);
+static char *xml_pstrdup(const char *string);
+static void xml_ereport(int level, char *msg, void *ctxt);
+static void xml_errorHandler(void *ctxt, const char *msg, ...);
+static void xml_ereport_by_code(int level, char *msg, int errcode);
+static xmlChar *xml_text2xmlChar(text *in);
+static xmlDocPtr xml_parse(text *data, int opts, bool is_document);
+
+
+/* Global variables */
+/* taken from contrib/xml2 */
+/* FIXME: DO NOT USE global vars !!! */
+char *xml_errbuf; /* per line error buffer */
+char *xml_errmsg = NULL; /* overall error message */
+
+#endif /* USE_LIBXML */
+
+
+#define NO_XML_SUPPORT() ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("no XML support in this installation")))
+
+
+Datum
+xml_in(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ char *s = PG_GETARG_CSTRING(0);
+ size_t len;
+ xmltype *vardata;
+
+ len = strlen(s);
+ vardata = palloc(len + VARHDRSZ);
+ VARATT_SIZEP(vardata) = len + VARHDRSZ;
+ memcpy(VARDATA(vardata), s, len);
+
+ /*
+ * Parse the data to check if it is well-formed XML data. Assume
+ * that ERROR occurred if parsing failed. Do we need DTD
+ * validation (if DTD exists)?
+ */
+ xml_parse(vardata, XML_PARSE_DTDATTR | XML_PARSE_DTDVALID, false);
+
+ PG_RETURN_XML_P(vardata);
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif
+}
+
+
+Datum
+xml_out(PG_FUNCTION_ARGS)
+{
+ xmltype *s = PG_GETARG_XML_P(0);
+ char *result;
+ int32 len;
+
+ len = VARSIZE(s) - VARHDRSZ;
+ result = palloc(len + 1);
+ memcpy(result, VARDATA(s), len);
+ result[len] = '\0';
+
+ PG_RETURN_CSTRING(result);
+}
+
+
+#ifdef USE_LIBXML
+static void
+appendStringInfoText(StringInfo str, const text *t)
+{
+ appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
+}
+
+
+static xmltype *
+stringinfo_to_xmltype(StringInfo buf)
+{
+ int32 len;
+ xmltype *result;
+
+ len = buf->len + VARHDRSZ;
+ result = palloc(len);
+ VARATT_SIZEP(result) = len;
+ memcpy(VARDATA(result), buf->data, buf->len);
+
+ return result;
+}
+#endif
+
+
+Datum
+xmlcomment(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *arg = PG_GETARG_TEXT_P(0);
+ int len = VARATT_SIZEP(arg) - VARHDRSZ;
+ StringInfoData buf;
+ int i;
+
+ /* check for "--" in string or "-" at the end */
+ for (i = 1; i < len; i++)
+ if ((VARDATA(arg)[i] == '-' && VARDATA(arg)[i - 1] == '-')
+ || (VARDATA(arg)[i] == '-' && i == len - 1))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_XML_COMMENT),
+ errmsg("invalid XML comment")));
+
+ initStringInfo(&buf);
+ appendStringInfo(&buf, "<!--");
+ appendStringInfoText(&buf, arg);
+ appendStringInfo(&buf, "-->");
+
+ PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif
+}
+
+
+Datum
+xmlparse(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *data;
+ bool is_document;
+ bool preserve_whitespace;
+
+ data = PG_GETARG_TEXT_P(0);
+
+ if (PG_NARGS() >= 2)
+ is_document = PG_GETARG_BOOL(1);
+ else
+ is_document = false;
+
+ if (PG_NARGS() >= 3)
+ preserve_whitespace = PG_GETARG_BOOL(2);
+ else
+ /*
+ * Since the XMLPARSE grammar makes STRIP WHITESPACE the
+ * default, this argument should really default to false. But
+ * until we have actually implemented whitespace stripping,
+ * this would be annoying.
+ */
+ preserve_whitespace = true;
+
+ if (!preserve_whitespace)
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("XMLPARSE with STRIP WHITESPACE is not implemented")));
+
+ /*
+ * Note, that here we try to apply DTD defaults
+ * (XML_PARSE_DTDATTR) according to SQL/XML:10.16.7.d: 'Default
+ * valies defined by internal DTD are applied'. As for external
+ * DTDs, we try to support them too, (see SQL/XML:10.16.7.e)
+ */
+ xml_parse(data, XML_PARSE_DTDATTR, is_document); /* assume that ERROR occurred if parsing failed */
+
+ PG_RETURN_XML_P(data);
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif
+}
+
+
+Datum
+xmlpi(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ char *target = NameStr(*PG_GETARG_NAME(0));
+ StringInfoData buf;
+
+ if (strlen(target) >= 3
+ && (target[0] == 'x' || target[0] == 'X')
+ && (target[1] == 'm' || target[1] == 'M')
+ && (target[2] == 'l' || target[2] == 'L'))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid XML processing instruction"),
+ errdetail("XML processing instruction target name cannot start with \"xml\".")));
+ }
+
+ initStringInfo(&buf);
+
+ appendStringInfo(&buf, "<?");
+ appendStringInfoString(&buf, map_sql_identifier_to_xml_name(target, false));
+ if (PG_NARGS() > 1)
+ {
+ text *arg = PG_GETARG_TEXT_P(1);
+ char *string;
+
+ string = DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(arg)));
+ if (strstr(string, "?>"))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
+ errmsg("invalid XML processing instruction"),
+ errdetail("XML processing instruction cannot contain \"?>\".")));
+
+ appendStringInfoString(&buf, " ");
+ appendStringInfoString(&buf, string);
+ }
+ appendStringInfoString(&buf, "?>");
+
+ PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif
+}
+
+
+Datum
+xmlroot(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ xmltype *data;
+ text *version;
+ int standalone;
+ StringInfoData buf;
+
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+ else
+ data = PG_GETARG_XML_P(0);
+
+ if (PG_ARGISNULL(1))
+ version = NULL;
+ else
+ version = PG_GETARG_TEXT_P(1);
+
+ if (PG_ARGISNULL(2))
+ standalone = 0;
+ else
+ {
+ bool tmp = PG_GETARG_BOOL(2);
+ standalone = (tmp ? 1 : -1);
+ }
+
+ /*
+ * FIXME: This is probably supposed to be cleverer if there
+ * already is an XML preamble.
+ */
+ initStringInfo(&buf);
+
+ appendStringInfo(&buf,"<?xml");
+ if (version) {
+ appendStringInfo(&buf, " version=\"");
+ appendStringInfoText(&buf, version);
+ appendStringInfo(&buf, "\"");
+ }
+ if (standalone)
+ appendStringInfo(&buf, " standalone=\"%s\"", (standalone == 1 ? "yes" : "no"));
+ appendStringInfo(&buf, "?>");
+ appendStringInfoText(&buf, (text *) data);
+
+ PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif
+}
+
+
+/*
+ * Validate document (given as string) against DTD (given as external link)
+ * TODO !!! use text instead of cstring for second arg
+ * TODO allow passing DTD as a string value (not only as an URI)
+ * TODO redesign (see comment with '!!!' below)
+ */
+Datum
+xmlvalidate(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *data = PG_GETARG_TEXT_P(0);
+ text *dtdOrUri = PG_GETARG_TEXT_P(1);
+ bool result = FALSE;
+ xmlParserCtxtPtr ctxt; /* the parser context */
+ xmlDocPtr doc; /* the resulting document tree */
+ xmlDtdPtr dtd;
+
+ xml_init();
+
+ ctxt = xmlNewParserCtxt();
+ if (ctxt == NULL)
+ xml_ereport(ERROR, "could not allocate parser context", ctxt);
+ doc = xmlCtxtReadMemory(ctxt, (char *) VARDATA(data),
+ VARSIZE(data) - VARHDRSZ, PG_XML_DEFAULT_URI, NULL, 0);
+ if (doc == NULL)
+ xml_ereport(ERROR, "could not parse XML data", ctxt);
+
+#if 0
+ uri = xmlCreateURI();
+ ereport(NOTICE, (errcode(0),errmsg(" dtd - %s", dtdOrUri)));
+ dtd = palloc(sizeof(xmlDtdPtr));
+ uri = xmlParseURI(dtdOrUri);
+ if (uri == NULL)
+ xml_ereport(ERROR, "not implemented yet... (TODO)", ctxt);
+ else
+#endif
+ dtd = xmlParseDTD(NULL, xml_text2xmlChar(dtdOrUri));
+
+ if (dtd == NULL)
+ {
+#if 0
+ xmlFreeDoc(doc);
+ xmlFreeParserCtxt(ctxt);
+#endif
+ xml_ereport(ERROR, "could not load DTD", ctxt);
+ }
+
+ if (xmlValidateDtd(xmlNewValidCtxt(), doc, dtd) == 1)
+ result = TRUE;
+
+#if 0
+ xmlFreeURI(uri);
+ xmlFreeDtd(dtd);
+ xmlFreeDoc(doc);
+ xmlFreeParserCtxt(ctxt);
+ xmlCleanupParser();
+#endif
+
+ if (!result)
+ xml_ereport(NOTICE, "validation against DTD failed", ctxt);
+
+ PG_RETURN_BOOL(result);
+#else /* not USE_LIBXML */
+ NO_XML_SUPPORT();
+ return 0;
+#endif /* not USE_LIBXML */
+}
+
+
+#ifdef USE_LIBXML
+
+/*
+ * Container for some init stuff (not good design!)
+ * TODO xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and check)
+ */
+static void
+xml_init(void)
+{
+ /*
+ * Currently, we have no pure UTF-8 support for internals -- check
+ * if we can work.
+ */
+ if (sizeof (char) != sizeof (xmlChar))
+ ereport(ERROR,
+ (errmsg("cannot initialize XML library"),
+ errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
+ sizeof(char), sizeof(xmlChar))));
+
+ xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
+ xmlInitParser();
+ LIBXML_TEST_VERSION;
+ /* do not flood PG's logfile with libxml error messages - reset error handler*/
+ xmlSetGenericErrorFunc(NULL, xml_errorHandler);
+ xml_errmsg = NULL;
+ xml_errbuf = palloc(XML_ERRBUF_SIZE);
+ memset(xml_errbuf, 0, XML_ERRBUF_SIZE);
+}
+
+
+/*
+ * Convert a C string to XML internal representation
+ * (same things as for TEXT, but with checking the data for well-formedness
+ * and, moreover, validation against DTD, if needed).
+ * NOTICE: We use TEXT type as internal storage type. In the future,
+ * we plan to create own storage type (maybe several types/strategies)
+ * TODO predefined DTDs / XSDs and validation
+ * TODO validation against XML Schema
+ * TODO maybe, libxml2's xmlreader is better? (do not construct DOM, yet do not use SAX - see xml_reader.c)
+ * TODO what about internal URI for docs? (see PG_XML_DEFAULT_URI below)
+ */
+static xmlDocPtr
+xml_parse(text *data, int opts, bool is_document)
+{
+ bool validationFailed = FALSE;
+ xmlParserCtxtPtr ctxt; /* the parser context */
+ xmlDocPtr doc; /* the resulting document tree */
+ int res_code;
+ int32 len;
+ xmlChar *string;
+#ifdef XML_DEBUG_DTD_CONST
+ xmlDtdPtr dtd; /* pointer to DTD */
+#endif
+
+ xml_init();
+
+ len = VARSIZE(data) - VARHDRSZ; /* will be useful later */
+ string = xml_text2xmlChar(data);
+
+ ctxt = xmlNewParserCtxt();
+ if (ctxt == NULL)
+ xml_ereport(ERROR, "could not allocate parser context", ctxt);
+
+ /* first, we try to parse the string as it is XML doc, then, as XML chunk */
+ ereport(DEBUG3, (errmsg("string to parse: %s", string)));
+ if (len > 4 && CMP5(string, '<', '?', 'x', 'm', 'l'))
+ {
+ /* consider it as DOCUMENT */
+ doc = xmlCtxtReadMemory(ctxt, string, len, PG_XML_DEFAULT_URI, NULL, opts);
+ if (doc == NULL)
+ {
+ xml_ereport(ERROR, "could not parse XML data", ctxt);
+#if 0
+ xmlFreeParserCtxt(ctxt);
+ xmlCleanupParser();
+ ereport(ERROR, (errmsg("could not parse XML data")));
+#endif
+ }
+ }
+ else
+ {
+ /* attempt to parse the string as if it is an XML fragment */
+ ereport(DEBUG3, (errmsg("the string is not an XML doc, trying to parse as a CHUNK")));
+ doc = xmlNewDoc(NULL);
+ /* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */
+ res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string, NULL);
+ if (res_code != 0)
+ {
+ xmlFreeParserCtxt(ctxt);
+ xmlCleanupParser();
+ xml_ereport_by_code(ERROR, "could not parse XML data", res_code);
+ }
+ }
+
+#ifdef XML_DEBUG_DTD_CONST
+ dtd = xmlParseDTD(NULL, (xmlChar *) XML_DEBUG_DTD_CONST);
+ xml_ereport(DEBUG3, "solid path to DTD was defined for debugging purposes", ctxt);
+ if (dtd == NULL)
+ {
+ xml_ereport(ERROR, "could not parse DTD data", ctxt);
+ }
+ else
+#else
+ /* if dtd for our xml data is detected... */
+ if ((doc->intSubset != NULL) || (doc->extSubset != NULL))
+#endif
+ {
+ /* assume that inline DTD exists - validation should be performed */
+#ifdef XML_DEBUG_DTD_CONST
+ if (xmlValidateDtd(xmlNewValidCtxt(), doc, dtd) != 1)
+#else
+ if (ctxt->valid == 0)
+#endif
+ {
+ /* DTD exists, but validator reported 'validation failed' */
+ validationFailed = TRUE;
+ }
+ }
+
+ if (validationFailed)
+ xml_ereport(WARNING, "validation against DTD failed", ctxt);
+
+ /* TODO encoding issues
+ * (thoughts:
+ * CASE:
+ * - XML data has explicit encoding attribute in its prolog
+ * - if not, assume that enc. of XML data is the same as client's one
+ *
+ * The common rule is to accept the XML data only if its encoding
+ * is the same as encoding of the storage (server's). The other possible
+ * option is to accept all the docs, but DO TRANSFORMATION and, if needed,
+ * change the prolog.
+ *
+ * I think I'd stick the first way (for the 1st version),
+ * it's much simplier (less errors...)
+ * ) */
+ /* ... */
+
+ xmlFreeParserCtxt(ctxt);
+ xmlCleanupParser();
+
+ ereport(DEBUG3, (errmsg("XML data successfully parsed, encoding: %s",
+ (char *) doc->encoding)));
+
+ return doc;
+}
+
+
+/*
+ * xmlChar<->text convertions
+ */
+static xmlChar *
+xml_text2xmlChar(text *in)
+{
+ int32 len = VARSIZE(in) - VARHDRSZ;
+ xmlChar *res;
+
+ res = palloc(len + 1);
+ memcpy(res, VARDATA(in), len);
+ res[len] = '\0';
+
+ return(res);
+}
+
+
+/*
+ * Wrappers for memory management functions
+ */
+static void *
+xml_palloc(size_t size)
+{
+ return palloc(size);
+}
+
+
+static void *
+xml_repalloc(void *ptr, size_t size)
+{
+ return repalloc(ptr, size);
+}
+
+
+static void
+xml_pfree(void *ptr)
+{
+ pfree(ptr);
+}
+
+
+static char *
+xml_pstrdup(const char *string)
+{
+ return pstrdup(string);
+}
+
+
+/*
+ * Wrapper for "ereport" function.
+ * Adds detail - libxml's native error message, if any.
+ */
+static void
+xml_ereport(int level, char *msg, void *ctxt)
+{
+ char *xmlErrDetail;
+ int xmlErrLen, i;
+ xmlErrorPtr libxmlErr = NULL;
+
+ if (xml_errmsg != NULL)
+ {
+ ereport(DEBUG1, (errmsg("%s", xml_errmsg)));
+ pfree(xml_errmsg);
+ }
+
+ if (ctxt != NULL)
+ libxmlErr = xmlCtxtGetLastError(ctxt);
+
+ if (libxmlErr == NULL)
+ {
+ if (level == ERROR)
+ {
+ xmlFreeParserCtxt(ctxt);
+ xmlCleanupParser();
+ }
+ ereport(level, (errmsg(msg)));
+ }
+ else
+ {
+ /* as usual, libxml error message contains '\n'; get rid of it */
+ xmlErrLen = strlen(libxmlErr->message); /* - 1; */
+ xmlErrDetail = (char *) palloc(xmlErrLen);
+ for (i = 0; i < xmlErrLen; i++)
+ {
+ if (libxmlErr->message[i] == '\n')
+ xmlErrDetail[i] = '.';
+ else
+ xmlErrDetail[i] = libxmlErr->message[i];
+ }
+ if (level == ERROR)
+ {
+ xmlFreeParserCtxt(ctxt);
+ xmlCleanupParser();
+ }
+ ereport(level, (errmsg(msg), errdetail("%s", xmlErrDetail)));
+ }
+}
+
+
+/*
+ * Error handler for libxml error messages
+ */
+static void
+xml_errorHandler(void *ctxt, const char *msg,...)
+{
+ va_list args;
+
+ va_start(args, msg);
+ vsnprintf(xml_errbuf, XML_ERRBUF_SIZE, msg, args);
+ va_end(args);
+ /* Now copy the argument across */
+ if (xml_errmsg == NULL)
+ xml_errmsg = pstrdup(xml_errbuf);
+ else
+ {
+ int32 xsize = strlen(xml_errmsg);
+
+ xml_errmsg = repalloc(xml_errmsg, (size_t) (xsize + strlen(xml_errbuf) + 1));
+ strncpy(&xml_errmsg[xsize - 1], xml_errbuf, strlen(xml_errbuf));
+ xml_errmsg[xsize + strlen(xml_errbuf) - 1] = '\0';
+ }
+ memset(xml_errbuf, 0, XML_ERRBUF_SIZE);
+}
+
+
+/*
+ * Return error message by libxml error code
+ * TODO make them closer to recommendations from Postgres manual
+ */
+static void
+xml_ereport_by_code(int level, char *msg, int code)
+{
+ const char *det;
+
+ if (code < 0)
+ {
+ ereport(level, (errmsg(msg)));
+ return;
+ }
+
+ switch (code) {
+ case XML_ERR_INTERNAL_ERROR:
+ det = "libxml internal error";
+ break;
+ case XML_ERR_ENTITY_LOOP:
+ det = "Detected an entity reference loop";
+ break;
+ case XML_ERR_ENTITY_NOT_STARTED:
+ det = "EntityValue: \" or ' expected";
+ break;
+ case XML_ERR_ENTITY_NOT_FINISHED:
+ det = "EntityValue: \" or ' expected";
+ break;
+ case XML_ERR_ATTRIBUTE_NOT_STARTED:
+ det = "AttValue: \" or ' expected";
+ break;
+ case XML_ERR_LT_IN_ATTRIBUTE:
+ det = "Unescaped '<' not allowed in attributes values";
+ break;
+ case XML_ERR_LITERAL_NOT_STARTED:
+ det = "SystemLiteral \" or ' expected";
+ break;
+ case XML_ERR_LITERAL_NOT_FINISHED:
+ det = "Unfinished System or Public ID \" or ' expected";
+ break;
+ case XML_ERR_MISPLACED_CDATA_END:
+ det = "Sequence ']]>' not allowed in content";
+ break;
+ case XML_ERR_URI_REQUIRED:
+ det = "SYSTEM or PUBLIC, the URI is missing";
+ break;
+ case XML_ERR_PUBID_REQUIRED:
+ det = "PUBLIC, the Public Identifier is missing";
+ break;
+ case XML_ERR_HYPHEN_IN_COMMENT:
+ det = "Comment must not contain '--' (double-hyphen)";
+ break;
+ case XML_ERR_PI_NOT_STARTED:
+ det = "xmlParsePI : no target name";
+ break;
+ case XML_ERR_RESERVED_XML_NAME:
+ det = "Invalid PI name";
+ break;
+ case XML_ERR_NOTATION_NOT_STARTED:
+ det = "NOTATION: Name expected here";
+ break;
+ case XML_ERR_NOTATION_NOT_FINISHED:
+ det = "'>' required to close NOTATION declaration";
+ break;
+ case XML_ERR_VALUE_REQUIRED:
+ det = "Entity value required";
+ break;
+ case XML_ERR_URI_FRAGMENT:
+ det = "Fragment not allowed";
+ break;
+ case XML_ERR_ATTLIST_NOT_STARTED:
+ det = "'(' required to start ATTLIST enumeration";
+ break;
+ case XML_ERR_NMTOKEN_REQUIRED:
+ det = "NmToken expected in ATTLIST enumeration";
+ break;
+ case XML_ERR_ATTLIST_NOT_FINISHED:
+ det = "')' required to finish ATTLIST enumeration";
+ break;
+ case XML_ERR_MIXED_NOT_STARTED:
+ det = "MixedContentDecl : '|' or ')*' expected";
+ break;
+ case XML_ERR_PCDATA_REQUIRED:
+ det = "MixedContentDecl : '#PCDATA' expected";
+ break;
+ case XML_ERR_ELEMCONTENT_NOT_STARTED:
+ det = "ContentDecl : Name or '(' expected";
+ break;
+ case XML_ERR_ELEMCONTENT_NOT_FINISHED:
+ det = "ContentDecl : ',' '|' or ')' expected";
+ break;
+ case XML_ERR_PEREF_IN_INT_SUBSET:
+ det = "PEReference: forbidden within markup decl in internal subset";
+ break;
+ case XML_ERR_GT_REQUIRED:
+ det = "Expected '>'";
+ break;
+ case XML_ERR_CONDSEC_INVALID:
+ det = "XML conditional section '[' expected";
+ break;
+ case XML_ERR_EXT_SUBSET_NOT_FINISHED:
+ det = "Content error in the external subset";
+ break;
+ case XML_ERR_CONDSEC_INVALID_KEYWORD:
+ det = "conditional section INCLUDE or IGNORE keyword expected";
+ break;
+ case XML_ERR_CONDSEC_NOT_FINISHED:
+ det = "XML conditional section not closed";
+ break;
+ case XML_ERR_XMLDECL_NOT_STARTED:
+ det = "Text declaration '<?xml' required";
+ break;
+ case XML_ERR_XMLDECL_NOT_FINISHED:
+ det = "parsing XML declaration: '?>' expected";
+ break;
+ case XML_ERR_EXT_ENTITY_STANDALONE:
+ det = "external parsed entities cannot be standalone";
+ break;
+ case XML_ERR_ENTITYREF_SEMICOL_MISSING:
+ det = "EntityRef: expecting ';'";
+ break;
+ case XML_ERR_DOCTYPE_NOT_FINISHED:
+ det = "DOCTYPE improperly terminated";
+ break;
+ case XML_ERR_LTSLASH_REQUIRED:
+ det = "EndTag: '</' not found";
+ break;
+ case XML_ERR_EQUAL_REQUIRED:
+ det = "Expected '='";
+ break;
+ case XML_ERR_STRING_NOT_CLOSED:
+ det = "String not closed expecting \" or '";
+ break;
+ case XML_ERR_STRING_NOT_STARTED:
+ det = "String not started expecting ' or \"";
+ break;
+ case XML_ERR_ENCODING_NAME:
+ det = "Invalid XML encoding name";
+ break;
+ case XML_ERR_STANDALONE_VALUE:
+ det = "Standalone accepts only 'yes' or 'no'";
+ break;
+ case XML_ERR_DOCUMENT_EMPTY:
+ det = "Document is empty";
+ break;
+ case XML_ERR_DOCUMENT_END:
+ det = "Extra content at the end of the document";
+ break;
+ case XML_ERR_NOT_WELL_BALANCED:
+ det = "Chunk is not well balanced";
+ break;
+ case XML_ERR_EXTRA_CONTENT:
+ det = "Extra content at the end of well balanced chunk";
+ break;
+ case XML_ERR_VERSION_MISSING:
+ det = "Malformed declaration expecting version";
+ break;
+ /* more err codes... Please, keep the order! */
+ case XML_ERR_ATTRIBUTE_WITHOUT_VALUE: /* 41 */
+ det ="Attribute without value";
+ break;
+ case XML_ERR_ATTRIBUTE_REDEFINED:
+ det ="Attribute defined more than once in the same element";
+ break;
+ case XML_ERR_COMMENT_NOT_FINISHED: /* 45 */
+ det = "Comment is not finished";
+ break;
+ case XML_ERR_NAME_REQUIRED: /* 68 */
+ det = "Element name not found";
+ break;
+ case XML_ERR_TAG_NOT_FINISHED: /* 77 */
+ det = "Closing tag not found";
+ break;
+ default:
+ det = "Unregistered error (libxml error code: %d)";
+ ereport(DEBUG1, (errmsg("Check out \"libxml/xmlerror.h\" and bring errcode \"%d\" processing to \"xml.c\".", code)));
+ }
+
+ if (xml_errmsg != NULL)
+ {
+ ereport(DEBUG1, (errmsg("%s", xml_errmsg)));
+ pfree(xml_errmsg);
+ }
+
+ ereport(level, (errmsg(msg), errdetail(det, code)));
+}
+
+
+/*
+ * Convert one char in the current server encoding to a Unicode
+ * codepoint.
+ */
+static pg_wchar
+sqlchar_to_unicode(unsigned char *s)
+{
+ int save_enc;
+ pg_wchar ret;
+ char *utf8string = pg_do_encoding_conversion(s, pg_mblen(s), GetDatabaseEncoding(), PG_UTF8);
+
+ save_enc = GetDatabaseEncoding();
+ SetDatabaseEncoding(PG_UTF8);
+ pg_mb2wchar_with_len(utf8string, &ret, pg_mblen(s));
+ SetDatabaseEncoding(save_enc);
+
+ return ret;
+}
+
+
+static bool
+is_valid_xml_namefirst(pg_wchar c)
+{
+ /* (Letter | '_' | ':') */
+ return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
+ || c == '_' || c == ':');
+}
+
+
+static bool
+is_valid_xml_namechar(pg_wchar c)
+{
+ /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
+ return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
+ || xmlIsDigitQ(c)
+ || c == '.' || c == '-' || c == '_' || c == ':'
+ || xmlIsCombiningQ(c)
+ || xmlIsExtenderQ(c));
+}
+#endif /* USE_LIBXML */
+
+
+/*
+ * Map SQL identifier to XML name; see SQL/XML:2003 section 9.1.
+ */
+char *
+map_sql_identifier_to_xml_name(unsigned char *ident, bool fully_escaped)
+{
+#ifdef USE_LIBXML
+ StringInfoData buf;
+ unsigned char *p;
+
+ initStringInfo(&buf);
+
+ for (p = ident; *p; p += pg_mblen(p))
+ {
+ if (*p == ':' && (p == ident || fully_escaped))
+ appendStringInfo(&buf, "_x003A_");
+ else if (*p == '_' && *(p+1) == 'x')
+ appendStringInfo(&buf, "_x005F_");
+ else if (fully_escaped && p == ident
+ && ( *p == 'x' || *p == 'X')
+ && ( *(p+1) == 'm' || *(p+1) == 'M')
+ && ( *(p+2) == 'l' || *(p+2) == 'L'))
+ {
+ if (*p == 'x')
+ appendStringInfo(&buf, "_x0078_");
+ else
+ appendStringInfo(&buf, "_x0058_");
+ }
+ else
+ {
+ pg_wchar u = sqlchar_to_unicode(p);
+
+ if (!is_valid_xml_namechar(u)
+ || (p == ident && !is_valid_xml_namefirst(u)))
+ appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
+ else
+ appendBinaryStringInfo(&buf, p, pg_mblen(p));
+ }
+ }
+
+ return buf.data;
+#else /* not USE_LIBXML */
+ NO_XML_SUPPORT();
+ return NULL;
+#endif /* not USE_LIBXML */
+}