aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/xml/pgxml_dom.c238
-rw-r--r--contrib/xml/pgxml_dom.source7
2 files changed, 245 insertions, 0 deletions
diff --git a/contrib/xml/pgxml_dom.c b/contrib/xml/pgxml_dom.c
new file mode 100644
index 00000000000..b01368a52ce
--- /dev/null
+++ b/contrib/xml/pgxml_dom.c
@@ -0,0 +1,238 @@
+/* Parser interface for DOM-based parser (libxml) rather than
+ stream-based SAX-type parser */
+
+#include "postgres.h"
+#include "fmgr.h"
+
+/* libxml includes */
+
+#include <libxml/xpath.h>
+#include <libxml/tree.h>
+#include <libxml/xmlmemory.h>
+
+/* declarations */
+
+static void *pgxml_palloc(size_t size);
+static void *pgxml_repalloc(void *ptr, size_t size);
+static void pgxml_pfree(void *ptr);
+static char *pgxml_pstrdup(const char* string);
+
+static void pgxml_parser_init();
+
+static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, xmlDocPtr doc,
+ xmlChar *toptagname, xmlChar *septagname,
+ int format);
+
+static xmlChar *pgxml_texttoxmlchar(text *textstring);
+
+
+Datum pgxml_parse(PG_FUNCTION_ARGS);
+Datum pgxml_xpath(PG_FUNCTION_ARGS);
+
+/* memory handling passthrough functions (e.g. palloc, pstrdup are
+ currently macros, and the others might become so...) */
+
+static void *pgxml_palloc(size_t size)
+{
+ return palloc(size);
+}
+
+static void *pgxml_repalloc(void *ptr, size_t size)
+{
+ return repalloc(ptr,size);
+}
+
+static void pgxml_pfree(void *ptr)
+{
+ return pfree(ptr);
+}
+
+static char *pgxml_pstrdup(const char *string)
+{
+ return pstrdup(string);
+}
+
+static void pgxml_parser_init()
+{
+ /* This code should also set parser settings from user-supplied
+ info. Quite how these settings are made is another matter :) */
+
+ xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
+ xmlInitParser();
+
+}
+
+
+/* Returns true if document is well-formed */
+
+PG_FUNCTION_INFO_V1(pgxml_parse);
+
+Datum
+pgxml_parse(PG_FUNCTION_ARGS)
+{
+ /* called as pgxml_parse(document) */
+ xmlDocPtr doctree;
+ text *t = PG_GETARG_TEXT_P(0); /*document buffer */
+ int32 docsize = VARSIZE(t) - VARHDRSZ;
+
+ pgxml_parser_init();
+
+ doctree = xmlParseMemory((char *) VARDATA(t), docsize);
+ if (doctree == NULL) {
+ /* xmlCleanupParser(); */
+ PG_RETURN_BOOL(false); /* i.e. not well-formed */
+ }
+ /* xmlCleanupParser(); */
+ xmlFreeDoc(doctree);
+ PG_RETURN_BOOL(true);
+}
+
+static xmlChar
+*pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
+ xmlDocPtr doc,
+ xmlChar *toptagname,
+ xmlChar *septagname,
+ int format)
+{
+ /* Function translates a nodeset into a text representation */
+ /* iterates over each node in the set and calls xmlNodeDump to write
+ it to an xmlBuffer -from which an xmlChar * string is returned. */
+ /* each representation is surrounded by <tagname> ... </tagname> */
+ /* if format==0, add a newline between nodes?? */
+
+ xmlBufferPtr buf;
+ xmlChar *result;
+ int i;
+
+ buf =xmlBufferCreate();
+
+ if ((toptagname != NULL) && (xmlStrlen(toptagname)>0)) {
+ xmlBufferWriteChar(buf,"<");
+ xmlBufferWriteCHAR(buf,toptagname);
+ xmlBufferWriteChar(buf,">");
+ }
+ if (nodeset != NULL) {
+ for (i=0; i < nodeset->nodeNr; i++) {
+ if ((septagname != NULL) && (xmlStrlen(septagname)>0)) {
+ xmlBufferWriteChar(buf,"<");
+ xmlBufferWriteCHAR(buf,septagname);
+ xmlBufferWriteChar(buf,">");
+ }
+ xmlNodeDump(buf, doc, nodeset->nodeTab[i],1,(format==2));
+
+ if ((septagname != NULL) && (xmlStrlen(septagname)>0)) {
+ xmlBufferWriteChar(buf,"</");
+ xmlBufferWriteCHAR(buf,septagname);
+ xmlBufferWriteChar(buf,">");
+ }
+ if (format) {
+ xmlBufferWriteChar(buf,"\n");
+ }
+ }
+ }
+
+ if ((toptagname != NULL) && (xmlStrlen(toptagname)>0)) {
+ xmlBufferWriteChar(buf,"</");
+ xmlBufferWriteCHAR(buf,toptagname);
+ xmlBufferWriteChar(buf,">");
+ }
+ result = xmlStrdup(buf->content);
+ xmlBufferFree(buf);
+ return result;
+}
+
+static xmlChar *pgxml_texttoxmlchar(text *textstring) {
+ xmlChar *res;
+ int32 txsize;
+
+ txsize=VARSIZE(textstring)-VARHDRSZ;
+ res = (xmlChar *) palloc(txsize+1);
+ memcpy((char *)res, VARDATA(textstring), txsize);
+ res[txsize]='\0';
+ return res;
+}
+
+
+PG_FUNCTION_INFO_V1(pgxml_xpath);
+
+Datum
+pgxml_xpath(PG_FUNCTION_ARGS)
+{
+ xmlDocPtr doctree;
+ xmlXPathContextPtr ctxt;
+ xmlXPathObjectPtr res;
+ xmlChar *xpath, *xpresstr, *toptag, *septag;
+ xmlXPathCompExprPtr comppath;
+
+ int32 docsize,ressize;
+ text *t, *xpres;
+
+ t = PG_GETARG_TEXT_P(0); /*document buffer */
+ xpath = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(1)); /* XPath expression */
+ toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
+ septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3));
+
+ docsize = VARSIZE(t) - VARHDRSZ;
+
+ pgxml_parser_init();
+
+ doctree = xmlParseMemory((char *) VARDATA(t), docsize);
+ if (doctree == NULL) { /* not well-formed */
+ PG_RETURN_NULL();
+ }
+
+ ctxt = xmlXPathNewContext(doctree);
+ ctxt->node = xmlDocGetRootElement(doctree);
+
+ /* compile the path */
+ comppath = xmlXPathCompile(xpath);
+ if (comppath == NULL) {
+ elog(NOTICE, "XPath syntax error");
+ xmlFreeDoc(doctree);
+ pfree((void *) xpath);
+ PG_RETURN_NULL();
+ }
+
+ /* Now evaluate the path expression. */
+ res = xmlXPathCompiledEval(comppath,ctxt);
+ xmlXPathFreeCompExpr(comppath);
+
+ if (res==NULL) {
+ xmlFreeDoc(doctree);
+ pfree((void *)xpath);
+ PG_RETURN_NULL(); /* seems appropriate */
+ }
+ /* now we dump this node, ?surrounding by tags? */
+ /* To do this, we look first at the type */
+ switch(res->type) {
+ case XPATH_NODESET:
+ xpresstr = pgxmlNodeSetToText(res->nodesetval,
+ doctree,
+ toptag, septag, 0);
+ break;
+ case XPATH_STRING:
+ xpresstr = xmlStrdup(res->stringval);
+ break;
+ default:
+ elog(NOTICE,"Unsupported XQuery result: %d",res->type);
+ xpresstr = xmlStrdup("<unsupported/>");
+ }
+
+
+ /* Now convert this result back to text */
+ ressize = strlen(xpresstr);
+ xpres = (text *) palloc(ressize+VARHDRSZ);
+ memcpy(VARDATA(xpres),xpresstr,ressize);
+ VARATT_SIZEP(xpres)=ressize + VARHDRSZ;
+
+ /* Free various storage */
+ xmlFreeDoc(doctree);
+ pfree((void *) xpath);
+ xmlFree(xpresstr);
+
+ PG_RETURN_TEXT_P(xpres);
+}
+
+
+
+
diff --git a/contrib/xml/pgxml_dom.source b/contrib/xml/pgxml_dom.source
new file mode 100644
index 00000000000..a8582398b1e
--- /dev/null
+++ b/contrib/xml/pgxml_dom.source
@@ -0,0 +1,7 @@
+--SQL for XML parser
+
+CREATE FUNCTION pgxml_parse(text) RETURNS bool
+ AS '_OBJWD_/pgxml_dom_DLSUFFIX_' LANGUAGE 'c' WITH (isStrict);
+
+CREATE FUNCTION pgxml_xpath(text,text,text,text) RETURNS text
+ AS '_OBJWD_/pgxml_dom_DLSUFFIX_' LANGUAGE 'c' WITH (isStrict); \ No newline at end of file