diff --git a/doc/src/sgml/func/func-xml.sgml b/doc/src/sgml/func/func-xml.sgml index 21f34467a4f8..0e958bda2ee6 100644 --- a/doc/src/sgml/func/func-xml.sgml +++ b/doc/src/sgml/func/func-xml.sgml @@ -61,6 +61,56 @@ SELECT xmltext('< foo & bar >'); + + <literal>xmlcanonicalize</literal> + + + xmlcanonicalize + + + +xmlcanonicalize ( doc xml [, keep_comments boolean DEFAULT true] ) xml + + + + + This function transforms a given XML document into its canonical form, + as defined by the W3C Canonical XML 1.1 Specification, which standardizes the document's + structure and syntax to facilitate comparison and validation. + The keep_comments parameter controls whether XML comments from the input document are preserved or discarded. + If omitted, it defaults to true. + + + + Example: + + + 42 + + '::xml); + xmlcanonicalize +----------------------------------------------------------------------------- + 42 +(1 row) + +SELECT + xmlcanonicalize( + ' + + 42 + + '::xml, false); + xmlcanonicalize +----------------------------------------------------------- + 42 +(1 row) +]]> + + + <literal>xmlcomment</literal> diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql index 2d946d6d9e9b..287cc6598937 100644 --- a/src/backend/catalog/system_functions.sql +++ b/src/backend/catalog/system_functions.sql @@ -268,6 +268,12 @@ CREATE OR REPLACE FUNCTION xpath_exists(text, xml) IMMUTABLE PARALLEL SAFE STRICT COST 1 RETURN xpath_exists($1, $2, '{}'::text[]); +CREATE OR REPLACE FUNCTION xmlcanonicalize(xml, boolean DEFAULT true) + RETURNS xml + LANGUAGE internal + IMMUTABLE PARALLEL SAFE STRICT +AS 'xmlcanonicalize'; + CREATE OR REPLACE FUNCTION pg_sleep_for(interval) RETURNS void LANGUAGE sql diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 35c915573a1d..bc7ccfc4bff9 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -58,6 +58,7 @@ #include #include #include +#include /* * We used to check for xmlStructuredErrorContext via a configure test; but @@ -565,6 +566,86 @@ xmltext(PG_FUNCTION_ARGS) #endif /* not USE_LIBXML */ } +/* + * Canonicalizes the given XML document according to the W3C Canonical XML 1.1 + * specification, using libxml2's xmlC14NDocDumpMemory(). + * + * The input XML must be a well-formed document (not a fragment). The + * canonical form is deterministic and useful for digital signatures and + * comparing logically equivalent XML. + * + * The second argument determines whether comments are preserved + * (true) or omitted (false) in the canonicalized output. + */ +Datum xmlcanonicalize(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXML + xmltype *arg = PG_GETARG_XML_P(0); + bool keep_comments = PG_GETARG_BOOL(1); + text *result; + volatile xmlChar *xmlbuf = NULL; + volatile int nbytes = 0; + volatile xmlDocPtr doc = NULL; + PgXmlErrorContext *xmlerrcxt; + + /* Set up XML error context for proper libxml2 error integration */ + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); + + PG_TRY(); + { + /* Parse the input as a full XML document */ + doc = xml_parse(arg, XMLOPTION_DOCUMENT, false, + GetDatabaseEncoding(), NULL, NULL, (Node *)xmlerrcxt); + + /* + * xmlC14NDocDumpMemory arguments: + * - doc: the XML document to canonicalize (already parsed above) + * - nodes: NULL means the entire document is canonicalized + * - mode: 2 selects the Canonical XML 1.1 algorithm (xmlC14NMode enum) + * - inclusive_ns_prefixes: NULL includes all namespaces by default + * - with_comments: determined by keep_comments argument + * - doc_txt_ptr: output buffer receiving the canonicalized XML (xmlbuf) + * + * On success, xmlbuf points to the serialized canonical form, and nbytes + * holds its size. + */ + nbytes = xmlC14NDocDumpMemory(doc, + NULL, /* entire document */ + 2, /* xmlC14NMode 1.1 */ + NULL, /* all namespaces */ + keep_comments, + (xmlChar **)&xmlbuf); + + if (nbytes < 0 || xmlbuf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not canonicalize XML document"); + + result = cstring_to_text_with_len((const char *)xmlbuf, nbytes); + } + PG_CATCH(); + { + if (doc) + xmlFreeDoc((xmlDocPtr)doc); + if (xmlbuf) + xmlFree((xmlChar *)xmlbuf); + + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); + } + PG_END_TRY(); + + if (doc) + xmlFreeDoc((xmlDocPtr)doc); + if (xmlbuf) + xmlFree((xmlChar *)xmlbuf); + pg_xml_done(xmlerrcxt, false); + + PG_RETURN_XML_P(result); +#else + NO_XML_SUPPORT(); + return 0; +#endif /* not USE_LIBXML */ +} /* * TODO: xmlconcat needs to merge the notations and unparsed entities diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 9121a382f76b..2afc1e255a71 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -9179,6 +9179,9 @@ { oid => '3813', descr => 'generate XML text node', proname => 'xmltext', prorettype => 'xml', proargtypes => 'text', prosrc => 'xmltext' }, +{ oid => '3814', descr => 'generate the canonical form of an XML document', + proname => 'xmlcanonicalize', prorettype => 'xml', proargtypes => 'xml bool', + prosrc => 'xmlcanonicalize' }, { oid => '2923', descr => 'map table contents to XML', proname => 'table_to_xml', procost => '100', provolatile => 's', diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index 103a22a3b1d3..688c0fc3e9de 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -1881,3 +1881,85 @@ SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j'::char); x<P>73</P>0.42truej (1 row) +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + 421 321 value>"0" && value<"10" ?"valid":"error"+ + +(1 row) + +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 421 321 value>"0" && value<"10" ?"valid":"error" +(1 row) + +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; + ?column? +---------- + t +(1 row) + +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- + +(1 row) + +SELECT xmlcanonicalize(NULL, true); + xmlcanonicalize +----------------- + +(1 row) + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(' ', true); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(''); +ERROR: invalid XML document +SELECT xmlcanonicalize(' '); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo'); +ERROR: invalid XML document +\set VERBOSITY default diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out index 73c411118a39..8bc3ac1c9668 100644 --- a/src/test/regress/expected/xml_1.out +++ b/src/test/regress/expected/xml_1.out @@ -1496,3 +1496,83 @@ ERROR: unsupported XML feature LINE 1: SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j':... ^ DETAIL: This functionality requires the server to be built with libxml support. +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); +ERROR: unsupported XML feature +LINE 2: (' + ^ +DETAIL: This functionality requires the server to be built with libxml support. +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- +(0 rows) + +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- +(0 rows) + +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; + ?column? +---------- +(0 rows) + +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- +(0 rows) + +SELECT xmlcanonicalize(NULL, true); + xmlcanonicalize +----------------- + +(1 row) + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize(' ', true); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize('foo', true); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize(''); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize(' '); +ERROR: unsupported XML feature at character 24 +SELECT xmlcanonicalize('foo'); +ERROR: unsupported XML feature at character 24 +\set VERBOSITY default diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out index a85d95358d90..4ce36ff82571 100644 --- a/src/test/regress/expected/xml_2.out +++ b/src/test/regress/expected/xml_2.out @@ -1867,3 +1867,85 @@ SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j'::char); x<P>73</P>0.42truej (1 row) +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + + + 421 321 value>"0" && value<"10" ?"valid":"error"+ + +(1 row) + +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; + xmlcanonicalize +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 421 321 value>"0" && value<"10" ?"valid":"error" +(1 row) + +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; + ?column? +---------- + t +(1 row) + +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; + xmlcanonicalize +----------------- + +(1 row) + +SELECT xmlcanonicalize(NULL, true); + xmlcanonicalize +----------------- + +(1 row) + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(' ', true); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo', true); +ERROR: invalid XML document +SELECT xmlcanonicalize(''); +ERROR: invalid XML document +SELECT xmlcanonicalize(' '); +ERROR: invalid XML document +SELECT xmlcanonicalize('foo'); +ERROR: invalid XML document +\set VERBOSITY default diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql index 0ea4f508837c..4af51a9908f8 100644 --- a/src/test/regress/sql/xml.sql +++ b/src/test/regress/sql/xml.sql @@ -679,3 +679,55 @@ SELECT xmltext(' '); SELECT xmltext('foo `$_-+?=*^%!|/\()[]{}'); SELECT xmltext('foo & <"bar">'); SELECT xmltext('x'|| '

73

'::xml || .42 || true || 'j'::char); + +-- xmlserialize: canonical +CREATE TABLE xmlcanonicalize_test (doc xml); +INSERT INTO xmlcanonicalize_test VALUES + (' + + + ]> + + + + + + + &val; + + + + + + 1 + + + 321 + + + + + + + + "0" && value<"10" ?"valid":"error"]]> + '); + +SELECT xmlcanonicalize(doc, true) FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(doc, false) FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(doc, true)::text = xmlcanonicalize(doc)::text FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(doc, NULL) FROM xmlcanonicalize_test; +SELECT xmlcanonicalize(NULL, true); + +\set VERBOSITY terse +SELECT xmlcanonicalize('', true); +SELECT xmlcanonicalize(' ', true); +SELECT xmlcanonicalize('foo', true); +SELECT xmlcanonicalize(''); +SELECT xmlcanonicalize(' '); +SELECT xmlcanonicalize('foo'); +\set VERBOSITY default \ No newline at end of file