diff options
Diffstat (limited to 'contrib/xml/pgxml.c')
-rw-r--r-- | contrib/xml/pgxml.c | 451 |
1 files changed, 244 insertions, 207 deletions
diff --git a/contrib/xml/pgxml.c b/contrib/xml/pgxml.c index 47289031579..3939250e25e 100644 --- a/contrib/xml/pgxml.c +++ b/contrib/xml/pgxml.c @@ -14,32 +14,39 @@ XML_Memory_Handling_Suite mhs; /* passthrough functions (palloc is a macro) */ -static void *pgxml_palloc(size_t size) +static void * +pgxml_palloc(size_t size) { - return palloc(size); + return palloc(size); } -static void *pgxml_repalloc(void *ptr, size_t size) +static void * +pgxml_repalloc(void *ptr, size_t size) { - return repalloc(ptr,size); + return repalloc(ptr, size); } -static void pgxml_pfree(void *ptr) +static void +pgxml_pfree(void *ptr) { - return pfree(ptr); + return pfree(ptr); } -static void pgxml_mhs_init() +static void +pgxml_mhs_init() { - mhs.malloc_fcn = pgxml_palloc; - mhs.realloc_fcn = pgxml_repalloc; - mhs.free_fcn = pgxml_pfree; + mhs.malloc_fcn = pgxml_palloc; + mhs.realloc_fcn = pgxml_repalloc; + mhs.free_fcn = pgxml_pfree; } -static void pgxml_handler_init() +static void +pgxml_handler_init() { - /* This code should set up the relevant handlers from user-supplied - settings. Quite how these settings are made is another matter :) */ + /* + * This code should set up the relevant handlers from user-supplied + * settings. Quite how these settings are made is another matter :) + */ } /* Returns true if document is well-formed */ @@ -49,31 +56,35 @@ PG_FUNCTION_INFO_V1(pgxml_parse); Datum pgxml_parse(PG_FUNCTION_ARGS) { - /* called as pgxml_parse(document) */ - XML_Parser p; - text *t = PG_GETARG_TEXT_P(0); /*document buffer */ - int32 docsize = VARSIZE(t) - VARHDRSZ; - - pgxml_mhs_init(); - - pgxml_handler_init(); - - p = XML_ParserCreate_MM(NULL,&mhs,NULL); - if (! p) { - elog(ERROR, "pgxml: Could not create expat parser"); - PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */ - } - - if (! XML_Parse(p, (char *)VARDATA(t) , docsize, 1)) { - /* elog(NOTICE, "Parse error at line %d:%s", - XML_GetCurrentLineNumber(p), - XML_ErrorString(XML_GetErrorCode(p))); */ - XML_ParserFree(p); - PG_RETURN_BOOL(false); - } - - XML_ParserFree(p); - PG_RETURN_BOOL(true); + /* called as pgxml_parse(document) */ + XML_Parser p; + text *t = PG_GETARG_TEXT_P(0); /* document buffer */ + int32 docsize = VARSIZE(t) - VARHDRSZ; + + pgxml_mhs_init(); + + pgxml_handler_init(); + + p = XML_ParserCreate_MM(NULL, &mhs, NULL); + if (!p) + { + elog(ERROR, "pgxml: Could not create expat parser"); + PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */ + } + + if (!XML_Parse(p, (char *) VARDATA(t), docsize, 1)) + { + /* + * elog(NOTICE, "Parse error at line %d:%s", + * XML_GetCurrentLineNumber(p), + * XML_ErrorString(XML_GetErrorCode(p))); + */ + XML_ParserFree(p); + PG_RETURN_BOOL(false); + } + + XML_ParserFree(p); + PG_RETURN_BOOL(true); } /* XPath handling functions */ @@ -81,83 +92,88 @@ pgxml_parse(PG_FUNCTION_ARGS) /* XPath support here is for a very skeletal kind of XPath! It was easy to program though... */ -/* This first is the core function that builds a result set. The +/* This first is the core function that builds a result set. The actual functions called by the user manipulate that result set in various ways. */ -static XPath_Results *build_xpath_results(text *doc, text *pathstr) +static XPath_Results * +build_xpath_results(text *doc, text *pathstr) { - XPath_Results *xpr; - char *res; - pgxml_udata *udata; - XML_Parser p; - int32 docsize; + XPath_Results *xpr; + char *res; + pgxml_udata *udata; + XML_Parser p; + int32 docsize; - xpr = (XPath_Results *) palloc((sizeof(XPath_Results))); - memset((void *)xpr, 0, sizeof(XPath_Results)); - xpr->rescount=0; + xpr = (XPath_Results *) palloc((sizeof(XPath_Results))); + memset((void *) xpr, 0, sizeof(XPath_Results)); + xpr->rescount = 0; - docsize=VARSIZE(doc)-VARHDRSZ; + docsize = VARSIZE(doc) - VARHDRSZ; - /* res isn't going to be the real return type, it is just a buffer */ + /* res isn't going to be the real return type, it is just a buffer */ - res = (char *) palloc(docsize); - memset((void *)res, 0, docsize); + res = (char *) palloc(docsize); + memset((void *) res, 0, docsize); - xpr->resbuf = res; + xpr->resbuf = res; - udata = (pgxml_udata *) palloc((sizeof(pgxml_udata))); - memset((void *)udata,0,sizeof(pgxml_udata)); + udata = (pgxml_udata *) palloc((sizeof(pgxml_udata))); + memset((void *) udata, 0, sizeof(pgxml_udata)); - udata->currentpath[0]='\0'; - udata->textgrab=0; + udata->currentpath[0] = '\0'; + udata->textgrab = 0; - udata->path= (char *) palloc(VARSIZE(pathstr)); - memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr)-VARHDRSZ); + udata->path = (char *) palloc(VARSIZE(pathstr)); + memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr) - VARHDRSZ); - udata->path[VARSIZE(pathstr)-VARHDRSZ]='\0'; + udata->path[VARSIZE(pathstr) - VARHDRSZ] = '\0'; - udata->resptr = res; - udata->reslen = 0; + udata->resptr = res; + udata->reslen = 0; - udata->xpres = xpr; + udata->xpres = xpr; - /* Now fire up the parser */ - pgxml_mhs_init(); + /* Now fire up the parser */ + pgxml_mhs_init(); - p = XML_ParserCreate_MM(NULL,&mhs,NULL); - if (! p) { - elog(ERROR, "pgxml: Could not create expat parser"); - pfree(xpr); - pfree(udata->path); - pfree(udata); - pfree(res); - return NULL; - } - XML_SetUserData(p, (void *)udata); - - /* Set the handlers */ - - XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler); - XML_SetCharacterDataHandler(p, pgxml_charhandler); - - if (! XML_Parse(p, (char *)VARDATA(doc) , docsize, 1)) { - /* elog(NOTICE, "Parse error at line %d:%s", - XML_GetCurrentLineNumber(p), - XML_ErrorString(XML_GetErrorCode(p))); */ - XML_ParserFree(p); - pfree(xpr); - pfree(udata->path); - pfree(udata); - - return NULL; - } + p = XML_ParserCreate_MM(NULL, &mhs, NULL); + if (!p) + { + elog(ERROR, "pgxml: Could not create expat parser"); + pfree(xpr); + pfree(udata->path); + pfree(udata); + pfree(res); + return NULL; + } + XML_SetUserData(p, (void *) udata); + + /* Set the handlers */ + + XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler); + XML_SetCharacterDataHandler(p, pgxml_charhandler); + + if (!XML_Parse(p, (char *) VARDATA(doc), docsize, 1)) + { + /* + * elog(NOTICE, "Parse error at line %d:%s", + * XML_GetCurrentLineNumber(p), + * XML_ErrorString(XML_GetErrorCode(p))); + */ + XML_ParserFree(p); + pfree(xpr); + pfree(udata->path); + pfree(udata); + + return NULL; + } - pfree(udata->path); - pfree(udata); - XML_ParserFree(p); - return xpr; + pfree(udata->path); + pfree(udata); + XML_ParserFree(p); + return xpr; } @@ -166,145 +182,166 @@ PG_FUNCTION_INFO_V1(pgxml_xpath); Datum pgxml_xpath(PG_FUNCTION_ARGS) { - /* called as pgxml_xpath(document,pathstr, index) for the moment*/ + /* called as pgxml_xpath(document,pathstr, index) for the moment */ + + XPath_Results *xpresults; + text *restext; - XPath_Results *xpresults; - text *restext; + text *t = PG_GETARG_TEXT_P(0); /* document buffer */ + text *t2 = PG_GETARG_TEXT_P(1); + int32 ind = PG_GETARG_INT32(2) - 1; - text *t = PG_GETARG_TEXT_P(0); /*document buffer */ - text *t2= PG_GETARG_TEXT_P(1); - int32 ind = PG_GETARG_INT32(2) - 1; + xpresults = build_xpath_results(t, t2); - xpresults = build_xpath_results(t,t2); + /* + * This needs to be changed depending on the mechanism for returning + * our set of results. + */ - /* This needs to be changed depending on the mechanism for returning - our set of results. */ + if (xpresults == NULL) /* parse error (not WF or parser failure) */ + PG_RETURN_NULL(); - if (xpresults==NULL) /*parse error (not WF or parser failure) */ - { - PG_RETURN_NULL(); - } + if (ind >= (xpresults->rescount)) + PG_RETURN_NULL(); - if (ind >= (xpresults->rescount)) - { - PG_RETURN_NULL(); - } - - restext = (text *) palloc(xpresults->reslens[ind]+VARHDRSZ); - memcpy(VARDATA(restext),xpresults->results[ind],xpresults->reslens[ind]); + restext = (text *) palloc(xpresults->reslens[ind] + VARHDRSZ); + memcpy(VARDATA(restext), xpresults->results[ind], xpresults->reslens[ind]); - VARATT_SIZEP(restext) = xpresults->reslens[ind]+VARHDRSZ; + VARATT_SIZEP(restext) = xpresults->reslens[ind] + VARHDRSZ; - pfree(xpresults->resbuf); - pfree(xpresults); + pfree(xpresults->resbuf); + pfree(xpresults); - PG_RETURN_TEXT_P(restext); + PG_RETURN_TEXT_P(restext); } -static void pgxml_pathcompare(void *userData) +static void +pgxml_pathcompare(void *userData) { - char *matchpos; - - matchpos=strstr(UD->currentpath, UD->path); - - if (matchpos == NULL) { /* Should we have more logic here ? */ - if (UD->textgrab) { - UD->textgrab=0; - pgxml_finalisegrabbedtext(userData); - } - return; - } - /* OK, we have a match of some sort. Now we need to check that - our match is anchored to the *end* of the string AND - that it is immediately preceded by a '/'*/ - /* This test wouldn't work if strlen (UD->path) overran the length - of the currentpath, but that's not possible because we got a match! */ - - if ((matchpos + strlen(UD->path))[0]=='\0') - { - if ((UD->path)[0]=='/') { - if (matchpos == UD->currentpath) { - UD->textgrab=1; + char *matchpos; + + matchpos = strstr(UD->currentpath, UD->path); + + if (matchpos == NULL) + { /* Should we have more logic here ? */ + if (UD->textgrab) + { + UD->textgrab = 0; + pgxml_finalisegrabbedtext(userData); + } + return; } - } else { - if ((matchpos-1)[0]=='/') { - UD->textgrab=1; + + /* + * OK, we have a match of some sort. Now we need to check that our + * match is anchored to the *end* of the string AND that it is + * immediately preceded by a '/' + */ + + /* + * This test wouldn't work if strlen (UD->path) overran the length of + * the currentpath, but that's not possible because we got a match! + */ + + if ((matchpos + strlen(UD->path))[0] == '\0') + { + if ((UD->path)[0] == '/') + { + if (matchpos == UD->currentpath) + UD->textgrab = 1; + } + else + { + if ((matchpos - 1)[0] == '/') + UD->textgrab = 1; + } } - } - } } -static void pgxml_starthandler(void *userData, const XML_Char *name, - const XML_Char **atts) +static void +pgxml_starthandler(void *userData, const XML_Char * name, + const XML_Char ** atts) { - char sepstr[]="/"; - - if ((strlen(name)+strlen(UD->currentpath))>MAXPATHLENGTH-2) { - elog(NOTICE,"Path too long"); - } else { - strncat(UD->currentpath,sepstr,1); - strcat(UD->currentpath, name); - } - if (UD->textgrab) - { - /* Depending on user preference, should we "reconstitute" - the element into the result text? - */ - } else { - pgxml_pathcompare(userData); - } + char sepstr[] = "/"; + + if ((strlen(name) + strlen(UD->currentpath)) > MAXPATHLENGTH - 2) + elog(NOTICE, "Path too long"); + else + { + strncat(UD->currentpath, sepstr, 1); + strcat(UD->currentpath, name); + } + if (UD->textgrab) + { + /* + * Depending on user preference, should we "reconstitute" the + * element into the result text? + */ + } + else + pgxml_pathcompare(userData); } -static void pgxml_endhandler(void *userData, const XML_Char *name) +static void +pgxml_endhandler(void *userData, const XML_Char * name) { - /* Start by removing the current element off the end of the - currentpath */ - - char *sepptr; - - sepptr=strrchr(UD->currentpath,'/'); - if (sepptr==NULL) { - elog(ERROR,"There's a problem..."); - sepptr=UD->currentpath; - } - if (strcmp(name, sepptr+1) !=0) { - elog(NOTICE,"Wanted [%s], got [%s]",sepptr,name); - /* unmatched entry, so do nothing */ - } else { - sepptr[0]='\0'; /* Chop that element off the end */ - } - - if (UD->textgrab) { - pgxml_pathcompare(userData); - } + /* + * Start by removing the current element off the end of the + * currentpath + */ + + char *sepptr; + + sepptr = strrchr(UD->currentpath, '/'); + if (sepptr == NULL) + { + elog(ERROR, "There's a problem..."); + sepptr = UD->currentpath; + } + if (strcmp(name, sepptr + 1) != 0) + { + elog(NOTICE, "Wanted [%s], got [%s]", sepptr, name); + /* unmatched entry, so do nothing */ + } + else + { + sepptr[0] = '\0'; /* Chop that element off the end */ + } + + if (UD->textgrab) + pgxml_pathcompare(userData); } -static void pgxml_charhandler(void *userData, const XML_Char *s, int len) +static void +pgxml_charhandler(void *userData, const XML_Char * s, int len) { - if (UD->textgrab) { - if (len>0) { - memcpy(UD->resptr,s,len); - UD->resptr += len; - UD->reslen += len; - } - } + if (UD->textgrab) + { + if (len > 0) + { + memcpy(UD->resptr, s, len); + UD->resptr += len; + UD->reslen += len; + } + } } + /* Should I be using PG list types here? */ -static void pgxml_finalisegrabbedtext(void *userData) +static void +pgxml_finalisegrabbedtext(void *userData) { - /* In res/reslen, we have a single result. */ - UD->xpres->results[UD->xpres->rescount]= UD->resptr - UD->reslen; - UD->xpres->reslens[UD->xpres->rescount]= UD->reslen; - UD->reslen=0; - UD->xpres->rescount++; - - /* This effectively concatenates all the results together but we - do know where one ends and the next begins */ + /* In res/reslen, we have a single result. */ + UD->xpres->results[UD->xpres->rescount] = UD->resptr - UD->reslen; + UD->xpres->reslens[UD->xpres->rescount] = UD->reslen; + UD->reslen = 0; + UD->xpres->rescount++; + + /* + * This effectively concatenates all the results together but we do + * know where one ends and the next begins + */ } - - - |