From 3b82dffb11c8d0975c9df3e49142a745b387bb9a Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Thu, 17 Apr 2025 13:15:48 +0300 Subject: [PATCH 1/2] contrib/xml2: xslt_process() now reports XSLT-related error details * It sets (and restores) the libxslt error handler (xsltSetGenericErrorFunc()). Since it only supports old-school "generic" error handlers, which are no longer used in PG's libxml code, we reintroduced a "generic" error handler xml_generic_error_handler() in xml.c. * The alternative would have been to expose PgXmlErrorContext in xml.h, so we could implement a generic handler in xslt_proc.c. This is obviously not desirable, as it would depend on USE_LIBXML. * No longer use the PG_XML_STRICTNESS_LEGACY for error handling as the err_occurred flag was already checked via pg_xml_error_occurred() since 732061150b004385810e522f8629f5bf91d977b7. * This change means that xslt_process() now reports not only details about XML parsing errors, but XSLT-schema deviations and missing stylesheet parameters as well. * The XSLT error messages now also contain line numbers. For that to work, we had to set a dummy "SQL" URL when parsing XML strings. This is important, since xsltPrintErrorContext() includes line numbers only if an URL is set. * The special xsltSaveResultToString() error handling has been removed. It can practically only fail in OOM situations and there is no reason to handle them any different than with the other libxslt functions. * Updated test suite and added test case for detecting missing stylesheet parameters. This was initially reported here but has obviously been fixed in the meantime: https://www.postgresql.org/message-id/4C5ECEF9.3030806%40mlfowler.com --- contrib/xml2/expected/xml2.out | 17 +++++++++++++++++ contrib/xml2/sql/xml2.sql | 8 ++++++++ contrib/xml2/xslt_proc.c | 35 +++++++++++++++++++++++----------- src/backend/utils/adt/xml.c | 29 ++++++++++++++++++++++++++++ src/include/utils/xml.h | 2 ++ 5 files changed, 80 insertions(+), 11 deletions(-) diff --git a/contrib/xml2/expected/xml2.out b/contrib/xml2/expected/xml2.out index 3d97b14c3a1e..157d584e633e 100644 --- a/contrib/xml2/expected/xml2.out +++ b/contrib/xml2/expected/xml2.out @@ -261,3 +261,20 @@ $$ $$); ERROR: failed to apply stylesheet +DETAIL: runtime error: file SQL line 7 element output +File write for 0wn3d.txt refused +runtime error: file SQL line 7 element output +xsltDocumentElem: write rights for 0wn3d.txt denied +-- detecting missing stylesheet parameter +SELECT xslt_process('', +$$ + +$$)::xml; +ERROR: failed to apply stylesheet +DETAIL: runtime error: file SQL line 3 element value-of +Variable 'n1' has not been declared. +Undefined variable +runtime error: file SQL line 3 element value-of +XPath evaluation returned no result. diff --git a/contrib/xml2/sql/xml2.sql b/contrib/xml2/sql/xml2.sql index ef99d164f272..9d42ac8a0b1a 100644 --- a/contrib/xml2/sql/xml2.sql +++ b/contrib/xml2/sql/xml2.sql @@ -153,3 +153,11 @@ $$ $$); + +-- detecting missing stylesheet parameter +SELECT xslt_process('', +$$ + +$$)::xml; diff --git a/contrib/xml2/xslt_proc.c b/contrib/xml2/xslt_proc.c index 53550c7dc240..17776f78b535 100644 --- a/contrib/xml2/xslt_proc.c +++ b/contrib/xml2/xslt_proc.c @@ -60,6 +60,10 @@ xslt_process(PG_FUNCTION_ARGS) volatile int resstat = -1; xmlChar *volatile resstr = NULL; + /* the previous libxslt error context */ + xmlGenericErrorFunc saved_errfunc; + void *saved_errcxt; + if (fcinfo->nargs == 3) { paramstr = PG_GETARG_TEXT_PP(2); @@ -73,7 +77,14 @@ xslt_process(PG_FUNCTION_ARGS) } /* Setup parser */ - xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_ALL); + + /* + * Save the previous libxslt error context. + */ + saved_errfunc = xsltGenericError; + saved_errcxt = xsltGenericErrorContext; + xsltSetGenericErrorFunc(xmlerrcxt, xml_generic_error_handler); PG_TRY(); { @@ -81,9 +92,12 @@ xslt_process(PG_FUNCTION_ARGS) bool xslt_sec_prefs_error; int reslen = 0; - /* Parse document */ + /* + * Parse document. It's important to set an "URL", so libxslt includes + * line numbers in error messages (cf. xsltPrintErrorContext()). + */ doctree = xmlReadMemory((char *) VARDATA_ANY(doct), - VARSIZE_ANY_EXHDR(doct), NULL, NULL, + VARSIZE_ANY_EXHDR(doct), "SQL", NULL, XML_PARSE_NOENT); if (doctree == NULL || pg_xml_error_occurred(xmlerrcxt)) @@ -92,7 +106,7 @@ xslt_process(PG_FUNCTION_ARGS) /* Same for stylesheet */ ssdoc = xmlReadMemory((char *) VARDATA_ANY(ssheet), - VARSIZE_ANY_EXHDR(ssheet), NULL, NULL, + VARSIZE_ANY_EXHDR(ssheet), "SQL", NULL, XML_PARSE_NOENT); if (ssdoc == NULL || pg_xml_error_occurred(xmlerrcxt)) @@ -143,9 +157,10 @@ xslt_process(PG_FUNCTION_ARGS) resstat = xsltSaveResultToString((xmlChar **) &resstr, &reslen, restree, stylesheet); - - if (resstat >= 0) - result = cstring_to_text_with_len((char *) resstr, reslen); + if (resstat < 0 || pg_xml_error_occurred(xmlerrcxt)) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY, + "failed to save result to string"); + result = cstring_to_text_with_len((char *) resstr, reslen); } PG_CATCH(); { @@ -163,6 +178,7 @@ xslt_process(PG_FUNCTION_ARGS) xmlFree(resstr); xsltCleanupGlobals(); + xsltSetGenericErrorFunc(saved_errcxt, saved_errfunc); pg_xml_done(xmlerrcxt, true); PG_RE_THROW(); @@ -179,12 +195,9 @@ xslt_process(PG_FUNCTION_ARGS) if (resstr) xmlFree(resstr); + xsltSetGenericErrorFunc(saved_errcxt, saved_errfunc); pg_xml_done(xmlerrcxt, false); - /* XXX this is pretty dubious, really ought to throw error instead */ - if (resstat < 0) - PG_RETURN_NULL(); - PG_RETURN_TEXT_P(result); #else /* !USE_LIBXSLT */ diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 66b441836956..7baace398368 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -2118,6 +2118,35 @@ xml_errsave(Node *escontext, PgXmlErrorContext *errcxt, detail ? errdetail_internal("%s", detail) : 0)); } +/* + * Generic error handler for libxml errors and warnings. + * This is not used by this module, but may be useful for + * libxml-based libraries like libxslt, which do not support + * structured error handlers. + */ +void +xml_generic_error_handler(void *data, const char *msg,...) +{ + PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data; + va_list ap; + + /* + * Defend against someone passing us a bogus context struct. + * + * We force a backend exit if this check fails because longjmp'ing out of + * libxslt would likely render it unsafe to use further. + */ + if (xmlerrcxt->magic != ERRCXT_MAGIC) + elog(FATAL, "xml_generic_error_handler called with invalid PgXmlErrorContext"); + + appendStringInfoLineSeparator(&xmlerrcxt->err_buf); + va_start(ap, msg); + appendStringInfoVA(&xmlerrcxt->err_buf, msg, ap); + va_end(ap); + + /* Get rid of any trailing newlines in errorBuf */ + chopStringInfoNewlines(&xmlerrcxt->err_buf); +} /* * Error handler for libxml errors and warnings diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h index 0d7a816b9f93..7cb101e81d8c 100644 --- a/src/include/utils/xml.h +++ b/src/include/utils/xml.h @@ -66,6 +66,8 @@ extern void pg_xml_init_library(void); extern PgXmlErrorContext *pg_xml_init(PgXmlStrictness strictness); extern void pg_xml_done(PgXmlErrorContext *errcxt, bool isError); extern bool pg_xml_error_occurred(PgXmlErrorContext *errcxt); +extern void xml_generic_error_handler(void *data, const char *msg,...) + pg_attribute_printf(2, 3); extern void xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg); From a7b185f8f2ce8dc66dc322ba413a2a840a96ae80 Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Wed, 30 Apr 2025 13:19:42 +0300 Subject: [PATCH 2/2] contrib/xml2: overloaded xslt_process() to provide variants for xmltype and specifying parameters in arrays * There are apparently no functions that accept XML as text, except for xmlparse(). xslt_process() should therefore also accept xmltype. * A version accepting text is still kept for backwards compatibility, but is considered deprecated. * The new xmltype-based version expects an array of stylesheet parameter-value pairs, which is less limited than the now deprecated way of encoding all stylesheet parameters into a single text argument. We can now accept an arbitrary number of parameters and you can include `=` and `,` signs in both the key and value strings. Hstores haven't been used since they are in a module and we don't want to depend on any additional module. * The new implementation respects the database's encoding - text strings are always converted to UTF8 before passing them into libxml2. * On the downside, xml_parse() had to be made an external function. Since a declaration cannot be added to xml.h without drawing in libxml2 headers, the declaration is repeated in xslt_proc.c. Perhaps xml_parse() should be declared in a separate internal header? * xmlCtxtReadDoc() now sets a dummy "SQL" URL to preserve line numbers in XSLT stylesheet errors. This change at least does not break the test suite. --- contrib/xml2/expected/xml2.out | 13 +++ contrib/xml2/sql/xml2.sql | 8 ++ contrib/xml2/xml2--1.1.sql | 11 +++ contrib/xml2/xslt_proc.c | 148 +++++++++++++++++++++++++-------- doc/src/sgml/xml2.sgml | 19 +++-- src/backend/utils/adt/xml.c | 19 +++-- 6 files changed, 172 insertions(+), 46 deletions(-) diff --git a/contrib/xml2/expected/xml2.out b/contrib/xml2/expected/xml2.out index 157d584e633e..0a8a62802030 100644 --- a/contrib/xml2/expected/xml2.out +++ b/contrib/xml2/expected/xml2.out @@ -278,3 +278,16 @@ Variable 'n1' has not been declared. Undefined variable runtime error: file SQL line 3 element value-of XPath evaluation returned no result. +-- xmltype and Array-based signature +SELECT xslt_process(xmlelement(name xml), +$$ + +$$::xml, ARRAY['n1','"foo"']); + xslt_process +-------------- + foo + + +(1 row) + diff --git a/contrib/xml2/sql/xml2.sql b/contrib/xml2/sql/xml2.sql index 9d42ac8a0b1a..7555854d494f 100644 --- a/contrib/xml2/sql/xml2.sql +++ b/contrib/xml2/sql/xml2.sql @@ -161,3 +161,11 @@ $$ $$)::xml; + +-- xmltype and Array-based signature +SELECT xslt_process(xmlelement(name xml), +$$ + +$$::xml, ARRAY['n1','"foo"']); diff --git a/contrib/xml2/xml2--1.1.sql b/contrib/xml2/xml2--1.1.sql index 671372cb2711..a579a1e5e187 100644 --- a/contrib/xml2/xml2--1.1.sql +++ b/contrib/xml2/xml2--1.1.sql @@ -71,3 +71,14 @@ CREATE FUNCTION xslt_process(text,text) RETURNS text AS 'MODULE_PATHNAME' LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +CREATE FUNCTION xslt_process(xml,xml,text[]) +RETURNS xml +AS 'MODULE_PATHNAME','xslt_process_xmltype' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +-- the function checks for the correct argument count +CREATE FUNCTION xslt_process(xml,xml) +RETURNS xml +AS 'MODULE_PATHNAME','xslt_process_xmltype' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; diff --git a/contrib/xml2/xslt_proc.c b/contrib/xml2/xslt_proc.c index 17776f78b535..074952cf8bca 100644 --- a/contrib/xml2/xslt_proc.c +++ b/contrib/xml2/xslt_proc.c @@ -10,6 +10,9 @@ #include "fmgr.h" #include "utils/builtins.h" #include "utils/xml.h" +#include "utils/array.h" +#include "utils/memutils.h" +#include "mb/pg_wchar.h" #ifdef USE_LIBXSLT @@ -35,9 +38,18 @@ extern PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness); /* local defs */ +static xmltype *xslt_process_internal(xmltype *doct, xmltype *ssheet, const char **params); static const char **parse_params(text *paramstr); #endif /* USE_LIBXSLT */ +/* + * FIXME: This cannot easily be exposed in xml.h. + * Perhaps there should be an xml-internal.h? + */ +xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, + bool preserve_whitespace, int encoding, + XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes, + Node *escontext); PG_FUNCTION_INFO_V1(xslt_process); @@ -48,9 +60,103 @@ xslt_process(PG_FUNCTION_ARGS) text *doct = PG_GETARG_TEXT_PP(0); text *ssheet = PG_GETARG_TEXT_PP(1); - text *volatile result = NULL; - text *paramstr; - const char **params; + const char **params = NULL; + text *result; + + if (fcinfo->nargs == 3) + { + text *paramstr = PG_GETARG_TEXT_PP(2); + + params = parse_params(paramstr); + } + + result = xslt_process_internal(doct, ssheet, params); + + PG_RETURN_TEXT_P(result); + +#else /* !USE_LIBXSLT */ + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("xslt_process() is not available without libxslt"))); + PG_RETURN_NULL(); + +#endif /* USE_LIBXSLT */ +} + +PG_FUNCTION_INFO_V1(xslt_process_xmltype); + +Datum +xslt_process_xmltype(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXSLT + + xmltype *doct = PG_GETARG_XML_P(0); + xmltype *ssheet = PG_GETARG_XML_P(1); + const char **params = NULL; + xmltype *result; + + /* + * Parameters are key-value pairs. The values are XPath expressions, so + * strings will have to be escaped with single or double quotes. Even + * `xsltproc --stringparam` does nothing else than adding single or double + * quotes and fails if the value contains both. + */ + if (fcinfo->nargs == 3) + { + ArrayType *paramarray = PG_GETARG_ARRAYTYPE_P(2); + Datum *arr_datums; + bool *arr_nulls; + int arr_count; + int i, + j; + + deconstruct_array_builtin(paramarray, TEXTOID, &arr_datums, &arr_nulls, &arr_count); + + if ((arr_count % 2) != 0) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_ELEMENT_ERROR), + errmsg("number of stylesheet parameters (%d) must be a multiple of 2", + arr_count))); + + params = palloc_array(const char *, arr_count + 1); + + for (i = 0, j = 0; i < arr_count; i++) + { + char *cstr; + + if (arr_nulls[i]) + continue; + + cstr = TextDatumGetCString(arr_datums[i]); + params[j++] = (char *) pg_do_encoding_conversion((unsigned char *) cstr, + strlen(cstr), + GetDatabaseEncoding(), + PG_UTF8); + } + params[j] = NULL; + } + + result = xslt_process_internal(doct, ssheet, params); + + PG_RETURN_XML_P(result); + +#else /* !USE_LIBXSLT */ + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("xslt_process() is not available without libxslt"))); + PG_RETURN_NULL(); + +#endif /* USE_LIBXSLT */ +} + +#ifdef USE_LIBXSLT + +static xmltype * +xslt_process_internal(xmltype *doct, xmltype *ssheet, const char **params) +{ + text *volatile result; PgXmlErrorContext *xmlerrcxt; volatile xsltStylesheetPtr stylesheet = NULL; volatile xmlDocPtr doctree = NULL; @@ -64,18 +170,6 @@ xslt_process(PG_FUNCTION_ARGS) xmlGenericErrorFunc saved_errfunc; void *saved_errcxt; - if (fcinfo->nargs == 3) - { - paramstr = PG_GETARG_TEXT_PP(2); - params = parse_params(paramstr); - } - else - { - /* No parameters */ - params = (const char **) palloc(sizeof(char *)); - params[0] = NULL; - } - /* Setup parser */ xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_ALL); @@ -93,21 +187,18 @@ xslt_process(PG_FUNCTION_ARGS) int reslen = 0; /* - * Parse document. It's important to set an "URL", so libxslt includes - * line numbers in error messages (cf. xsltPrintErrorContext()). + * Parse document. */ - doctree = xmlReadMemory((char *) VARDATA_ANY(doct), - VARSIZE_ANY_EXHDR(doct), "SQL", NULL, - XML_PARSE_NOENT); + doctree = xml_parse(doct, XMLOPTION_DOCUMENT, true, + GetDatabaseEncoding(), NULL, NULL, NULL); if (doctree == NULL || pg_xml_error_occurred(xmlerrcxt)) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, "error parsing XML document"); /* Same for stylesheet */ - ssdoc = xmlReadMemory((char *) VARDATA_ANY(ssheet), - VARSIZE_ANY_EXHDR(ssheet), "SQL", NULL, - XML_PARSE_NOENT); + ssdoc = xml_parse(ssheet, XMLOPTION_DOCUMENT, true, + GetDatabaseEncoding(), NULL, NULL, NULL); if (ssdoc == NULL || pg_xml_error_occurred(xmlerrcxt)) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, @@ -198,18 +289,9 @@ xslt_process(PG_FUNCTION_ARGS) xsltSetGenericErrorFunc(saved_errcxt, saved_errfunc); pg_xml_done(xmlerrcxt, false); - PG_RETURN_TEXT_P(result); -#else /* !USE_LIBXSLT */ - - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("xslt_process() is not available without libxslt"))); - PG_RETURN_NULL(); -#endif /* USE_LIBXSLT */ + return result; } -#ifdef USE_LIBXSLT - static const char ** parse_params(text *paramstr) { diff --git a/doc/src/sgml/xml2.sgml b/doc/src/sgml/xml2.sgml index 9fd613f9675f..dc6fb40121db 100644 --- a/doc/src/sgml/xml2.sgml +++ b/doc/src/sgml/xml2.sgml @@ -408,22 +408,29 @@ ORDER BY doc_num, line_num; -xslt_process(text document, text stylesheet, text paramlist) returns text +xslt_process(xml document, xml stylesheet, text[] paramlist) returns xml This function applies the XSL stylesheet to the document and returns - the transformed result. The paramlist is a list of parameter - assignments to be used in the transformation, specified in the form - a=1,b=2. Note that the - parameter parsing is very simple-minded: parameter values cannot - contain commas! + the transformed result. The paramlist is an array of parameter + assignments to be used in the transformation, specified in pairs of + key and value strings (e.g. ARRAY['a','1', 'b','2']). + The length of the array must be even. + Note that the values are still interpreted as XPath expressions, so string values need to + be quoted in single or double quotes (e.g. ARRAY['a','"string"']). There is also a two-parameter version of xslt_process which does not pass any parameters to the transformation. + + + Deprecated variants of xslt_process accepting + text arguments and parameters encoded into single text strings + (e.g. a=1,b=2) are also still available. + diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 7baace398368..ef59f4c4db00 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -154,11 +154,11 @@ static int parse_xml_decl(const xmlChar *str, size_t *lenp, static bool print_xml_decl(StringInfo buf, const xmlChar *version, pg_enc encoding, int standalone); static bool xml_doctype_in_content(const xmlChar *str); -static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, - bool preserve_whitespace, int encoding, - XmlOptionType *parsed_xmloptiontype, - xmlNodePtr *parsed_nodes, - Node *escontext); +xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, + bool preserve_whitespace, int encoding, + XmlOptionType *parsed_xmloptiontype, + xmlNodePtr *parsed_nodes, + Node *escontext); static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt); static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, ArrayBuildState *astate, @@ -1782,7 +1782,7 @@ xml_doctype_in_content(const xmlChar *str) * TODO maybe libxml2's xmlreader is better? (do not construct DOM, * yet do not use SAX - see xmlreader.c) */ -static xmlDocPtr +xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, int encoding, XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes, @@ -1881,8 +1881,13 @@ xml_parse(text *data, XmlOptionType xmloption_arg, options = XML_PARSE_NOENT | XML_PARSE_DTDATTR | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS); + /* + * Setting a dummy "SQL" URL is important for the + * xsltPrintErrorContext() when using the legacy text-based + * xslt_process() variant. + */ doc = xmlCtxtReadDoc(ctxt, utf8string, - NULL, /* no URL */ + "SQL", "UTF-8", options);