diff --git a/contrib/xml2/expected/xml2.out b/contrib/xml2/expected/xml2.out index 3d97b14c3a1e..0a8a62802030 100644 --- a/contrib/xml2/expected/xml2.out +++ b/contrib/xml2/expected/xml2.out @@ -261,3 +261,33 @@ $$ $$); ERROR: failed to apply stylesheet +DETAIL: runtime error: file SQL line 7 element output +File write for 0wn3d.txt refused +runtime error: file SQL line 7 element output +xsltDocumentElem: write rights for 0wn3d.txt denied +-- detecting missing stylesheet parameter +SELECT xslt_process('', +$$ + +$$)::xml; +ERROR: failed to apply stylesheet +DETAIL: runtime error: file SQL line 3 element value-of +Variable 'n1' has not been declared. +Undefined variable +runtime error: file SQL line 3 element value-of +XPath evaluation returned no result. +-- xmltype and Array-based signature +SELECT xslt_process(xmlelement(name xml), +$$ + +$$::xml, ARRAY['n1','"foo"']); + xslt_process +-------------- + foo + + +(1 row) + diff --git a/contrib/xml2/sql/xml2.sql b/contrib/xml2/sql/xml2.sql index ef99d164f272..7555854d494f 100644 --- a/contrib/xml2/sql/xml2.sql +++ b/contrib/xml2/sql/xml2.sql @@ -153,3 +153,19 @@ $$ $$); + +-- detecting missing stylesheet parameter +SELECT xslt_process('', +$$ + +$$)::xml; + +-- xmltype and Array-based signature +SELECT xslt_process(xmlelement(name xml), +$$ + +$$::xml, ARRAY['n1','"foo"']); diff --git a/contrib/xml2/xml2--1.1.sql b/contrib/xml2/xml2--1.1.sql index 671372cb2711..a579a1e5e187 100644 --- a/contrib/xml2/xml2--1.1.sql +++ b/contrib/xml2/xml2--1.1.sql @@ -71,3 +71,14 @@ CREATE FUNCTION xslt_process(text,text) RETURNS text AS 'MODULE_PATHNAME' LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +CREATE FUNCTION xslt_process(xml,xml,text[]) +RETURNS xml +AS 'MODULE_PATHNAME','xslt_process_xmltype' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +-- the function checks for the correct argument count +CREATE FUNCTION xslt_process(xml,xml) +RETURNS xml +AS 'MODULE_PATHNAME','xslt_process_xmltype' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; diff --git a/contrib/xml2/xslt_proc.c b/contrib/xml2/xslt_proc.c index 53550c7dc240..074952cf8bca 100644 --- a/contrib/xml2/xslt_proc.c +++ b/contrib/xml2/xslt_proc.c @@ -10,6 +10,9 @@ #include "fmgr.h" #include "utils/builtins.h" #include "utils/xml.h" +#include "utils/array.h" +#include "utils/memutils.h" +#include "mb/pg_wchar.h" #ifdef USE_LIBXSLT @@ -35,9 +38,18 @@ extern PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness); /* local defs */ +static xmltype *xslt_process_internal(xmltype *doct, xmltype *ssheet, const char **params); static const char **parse_params(text *paramstr); #endif /* USE_LIBXSLT */ +/* + * FIXME: This cannot easily be exposed in xml.h. + * Perhaps there should be an xml-internal.h? + */ +xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, + bool preserve_whitespace, int encoding, + XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes, + Node *escontext); PG_FUNCTION_INFO_V1(xslt_process); @@ -48,9 +60,103 @@ xslt_process(PG_FUNCTION_ARGS) text *doct = PG_GETARG_TEXT_PP(0); text *ssheet = PG_GETARG_TEXT_PP(1); - text *volatile result = NULL; - text *paramstr; - const char **params; + const char **params = NULL; + text *result; + + if (fcinfo->nargs == 3) + { + text *paramstr = PG_GETARG_TEXT_PP(2); + + params = parse_params(paramstr); + } + + result = xslt_process_internal(doct, ssheet, params); + + PG_RETURN_TEXT_P(result); + +#else /* !USE_LIBXSLT */ + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("xslt_process() is not available without libxslt"))); + PG_RETURN_NULL(); + +#endif /* USE_LIBXSLT */ +} + +PG_FUNCTION_INFO_V1(xslt_process_xmltype); + +Datum +xslt_process_xmltype(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXSLT + + xmltype *doct = PG_GETARG_XML_P(0); + xmltype *ssheet = PG_GETARG_XML_P(1); + const char **params = NULL; + xmltype *result; + + /* + * Parameters are key-value pairs. The values are XPath expressions, so + * strings will have to be escaped with single or double quotes. Even + * `xsltproc --stringparam` does nothing else than adding single or double + * quotes and fails if the value contains both. + */ + if (fcinfo->nargs == 3) + { + ArrayType *paramarray = PG_GETARG_ARRAYTYPE_P(2); + Datum *arr_datums; + bool *arr_nulls; + int arr_count; + int i, + j; + + deconstruct_array_builtin(paramarray, TEXTOID, &arr_datums, &arr_nulls, &arr_count); + + if ((arr_count % 2) != 0) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_ELEMENT_ERROR), + errmsg("number of stylesheet parameters (%d) must be a multiple of 2", + arr_count))); + + params = palloc_array(const char *, arr_count + 1); + + for (i = 0, j = 0; i < arr_count; i++) + { + char *cstr; + + if (arr_nulls[i]) + continue; + + cstr = TextDatumGetCString(arr_datums[i]); + params[j++] = (char *) pg_do_encoding_conversion((unsigned char *) cstr, + strlen(cstr), + GetDatabaseEncoding(), + PG_UTF8); + } + params[j] = NULL; + } + + result = xslt_process_internal(doct, ssheet, params); + + PG_RETURN_XML_P(result); + +#else /* !USE_LIBXSLT */ + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("xslt_process() is not available without libxslt"))); + PG_RETURN_NULL(); + +#endif /* USE_LIBXSLT */ +} + +#ifdef USE_LIBXSLT + +static xmltype * +xslt_process_internal(xmltype *doct, xmltype *ssheet, const char **params) +{ + text *volatile result; PgXmlErrorContext *xmlerrcxt; volatile xsltStylesheetPtr stylesheet = NULL; volatile xmlDocPtr doctree = NULL; @@ -60,20 +166,19 @@ xslt_process(PG_FUNCTION_ARGS) volatile int resstat = -1; xmlChar *volatile resstr = NULL; - if (fcinfo->nargs == 3) - { - paramstr = PG_GETARG_TEXT_PP(2); - params = parse_params(paramstr); - } - else - { - /* No parameters */ - params = (const char **) palloc(sizeof(char *)); - params[0] = NULL; - } + /* the previous libxslt error context */ + xmlGenericErrorFunc saved_errfunc; + void *saved_errcxt; /* Setup parser */ - xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_ALL); + + /* + * Save the previous libxslt error context. + */ + saved_errfunc = xsltGenericError; + saved_errcxt = xsltGenericErrorContext; + xsltSetGenericErrorFunc(xmlerrcxt, xml_generic_error_handler); PG_TRY(); { @@ -81,19 +186,19 @@ xslt_process(PG_FUNCTION_ARGS) bool xslt_sec_prefs_error; int reslen = 0; - /* Parse document */ - doctree = xmlReadMemory((char *) VARDATA_ANY(doct), - VARSIZE_ANY_EXHDR(doct), NULL, NULL, - XML_PARSE_NOENT); + /* + * Parse document. + */ + doctree = xml_parse(doct, XMLOPTION_DOCUMENT, true, + GetDatabaseEncoding(), NULL, NULL, NULL); if (doctree == NULL || pg_xml_error_occurred(xmlerrcxt)) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, "error parsing XML document"); /* Same for stylesheet */ - ssdoc = xmlReadMemory((char *) VARDATA_ANY(ssheet), - VARSIZE_ANY_EXHDR(ssheet), NULL, NULL, - XML_PARSE_NOENT); + ssdoc = xml_parse(ssheet, XMLOPTION_DOCUMENT, true, + GetDatabaseEncoding(), NULL, NULL, NULL); if (ssdoc == NULL || pg_xml_error_occurred(xmlerrcxt)) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, @@ -143,9 +248,10 @@ xslt_process(PG_FUNCTION_ARGS) resstat = xsltSaveResultToString((xmlChar **) &resstr, &reslen, restree, stylesheet); - - if (resstat >= 0) - result = cstring_to_text_with_len((char *) resstr, reslen); + if (resstat < 0 || pg_xml_error_occurred(xmlerrcxt)) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY, + "failed to save result to string"); + result = cstring_to_text_with_len((char *) resstr, reslen); } PG_CATCH(); { @@ -163,6 +269,7 @@ xslt_process(PG_FUNCTION_ARGS) xmlFree(resstr); xsltCleanupGlobals(); + xsltSetGenericErrorFunc(saved_errcxt, saved_errfunc); pg_xml_done(xmlerrcxt, true); PG_RE_THROW(); @@ -179,24 +286,12 @@ xslt_process(PG_FUNCTION_ARGS) if (resstr) xmlFree(resstr); + xsltSetGenericErrorFunc(saved_errcxt, saved_errfunc); pg_xml_done(xmlerrcxt, false); - /* XXX this is pretty dubious, really ought to throw error instead */ - if (resstat < 0) - PG_RETURN_NULL(); - - PG_RETURN_TEXT_P(result); -#else /* !USE_LIBXSLT */ - - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("xslt_process() is not available without libxslt"))); - PG_RETURN_NULL(); -#endif /* USE_LIBXSLT */ + return result; } -#ifdef USE_LIBXSLT - static const char ** parse_params(text *paramstr) { diff --git a/doc/src/sgml/xml2.sgml b/doc/src/sgml/xml2.sgml index 9fd613f9675f..dc6fb40121db 100644 --- a/doc/src/sgml/xml2.sgml +++ b/doc/src/sgml/xml2.sgml @@ -408,22 +408,29 @@ ORDER BY doc_num, line_num; -xslt_process(text document, text stylesheet, text paramlist) returns text +xslt_process(xml document, xml stylesheet, text[] paramlist) returns xml This function applies the XSL stylesheet to the document and returns - the transformed result. The paramlist is a list of parameter - assignments to be used in the transformation, specified in the form - a=1,b=2. Note that the - parameter parsing is very simple-minded: parameter values cannot - contain commas! + the transformed result. The paramlist is an array of parameter + assignments to be used in the transformation, specified in pairs of + key and value strings (e.g. ARRAY['a','1', 'b','2']). + The length of the array must be even. + Note that the values are still interpreted as XPath expressions, so string values need to + be quoted in single or double quotes (e.g. ARRAY['a','"string"']). There is also a two-parameter version of xslt_process which does not pass any parameters to the transformation. + + + Deprecated variants of xslt_process accepting + text arguments and parameters encoded into single text strings + (e.g. a=1,b=2) are also still available. + diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 66b441836956..ef59f4c4db00 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -154,11 +154,11 @@ static int parse_xml_decl(const xmlChar *str, size_t *lenp, static bool print_xml_decl(StringInfo buf, const xmlChar *version, pg_enc encoding, int standalone); static bool xml_doctype_in_content(const xmlChar *str); -static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, - bool preserve_whitespace, int encoding, - XmlOptionType *parsed_xmloptiontype, - xmlNodePtr *parsed_nodes, - Node *escontext); +xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, + bool preserve_whitespace, int encoding, + XmlOptionType *parsed_xmloptiontype, + xmlNodePtr *parsed_nodes, + Node *escontext); static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt); static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, ArrayBuildState *astate, @@ -1782,7 +1782,7 @@ xml_doctype_in_content(const xmlChar *str) * TODO maybe libxml2's xmlreader is better? (do not construct DOM, * yet do not use SAX - see xmlreader.c) */ -static xmlDocPtr +xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, int encoding, XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes, @@ -1881,8 +1881,13 @@ xml_parse(text *data, XmlOptionType xmloption_arg, options = XML_PARSE_NOENT | XML_PARSE_DTDATTR | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS); + /* + * Setting a dummy "SQL" URL is important for the + * xsltPrintErrorContext() when using the legacy text-based + * xslt_process() variant. + */ doc = xmlCtxtReadDoc(ctxt, utf8string, - NULL, /* no URL */ + "SQL", "UTF-8", options); @@ -2118,6 +2123,35 @@ xml_errsave(Node *escontext, PgXmlErrorContext *errcxt, detail ? errdetail_internal("%s", detail) : 0)); } +/* + * Generic error handler for libxml errors and warnings. + * This is not used by this module, but may be useful for + * libxml-based libraries like libxslt, which do not support + * structured error handlers. + */ +void +xml_generic_error_handler(void *data, const char *msg,...) +{ + PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data; + va_list ap; + + /* + * Defend against someone passing us a bogus context struct. + * + * We force a backend exit if this check fails because longjmp'ing out of + * libxslt would likely render it unsafe to use further. + */ + if (xmlerrcxt->magic != ERRCXT_MAGIC) + elog(FATAL, "xml_generic_error_handler called with invalid PgXmlErrorContext"); + + appendStringInfoLineSeparator(&xmlerrcxt->err_buf); + va_start(ap, msg); + appendStringInfoVA(&xmlerrcxt->err_buf, msg, ap); + va_end(ap); + + /* Get rid of any trailing newlines in errorBuf */ + chopStringInfoNewlines(&xmlerrcxt->err_buf); +} /* * Error handler for libxml errors and warnings diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h index 0d7a816b9f93..7cb101e81d8c 100644 --- a/src/include/utils/xml.h +++ b/src/include/utils/xml.h @@ -66,6 +66,8 @@ extern void pg_xml_init_library(void); extern PgXmlErrorContext *pg_xml_init(PgXmlStrictness strictness); extern void pg_xml_done(PgXmlErrorContext *errcxt, bool isError); extern bool pg_xml_error_occurred(PgXmlErrorContext *errcxt); +extern void xml_generic_error_handler(void *data, const char *msg,...) + pg_attribute_printf(2, 3); extern void xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg);