From 18ba20e16251896692f3756446970b39d8ab56f1 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Sun, 15 May 2011 22:28:16 -0400 Subject: [PATCH 1/2] progress --- src/backend/utils/adt/xml.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index ee82d4616c..8dc7537e0d 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -142,6 +142,21 @@ static void SPI_sql_row_to_xmlelement(int rownum, StringInfo result, #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance" #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml" +/* forbidden C0 control chars */ +#define FORBIDDEN_C0 \ + "\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11" \ + "\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F" + +static inline void +check_forbidden_chars(char * str) +{ + if (strpbrk(str,FORBIDDEN_C0) != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("character out of range"), + errdetail("XML does not support control characters."))); + +} #ifdef USE_LIBXML @@ -411,6 +426,8 @@ xmlcomment(PG_FUNCTION_ARGS) appendStringInfoText(&buf, arg); appendStringInfo(&buf, "-->"); + check_forbidden_chars(buf.data); + PG_RETURN_XML_P(stringinfo_to_xmltype(&buf)); #else NO_XML_SUPPORT(); @@ -718,6 +735,8 @@ xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null) } appendStringInfoString(&buf, "?>"); + check_forbidden_chars(buf.data); + result = stringinfo_to_xmltype(&buf); pfree(buf.data); return result; @@ -741,6 +760,8 @@ xmlroot(xmltype *data, text *version, int standalone) len = VARSIZE(data) - VARHDRSZ; str = text_to_cstring((text *) data); + check_forbidden_chars(str); + parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone); if (version) @@ -1184,6 +1205,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, encoding, PG_UTF8); + /* check for illegal XML chars */ + check_forbidden_chars((char *) utf8string); + /* Start up libxml and its parser (no-ops if already done) */ pg_xml_init(); xmlInitParser(); @@ -1804,6 +1828,9 @@ map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings) getTypeOutputInfo(type, &typeOut, &isvarlena); str = OidOutputFunctionCall(typeOut, value); + /* check for illegal XML chars */ + check_forbidden_chars(str); + /* ... exactly as-is for XML, and when escaping is not wanted */ if (type == XMLOID || !xml_escape_strings) return str; From 43c7b34b0393afa01c7bc90b36e4940d6e60f22b Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Mon, 23 May 2011 11:41:38 -0400 Subject: [PATCH 2/2] check forbidden upper utf8 chars --- src/backend/utils/adt/xml.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 8dc7537e0d..902c4c7c2e 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -150,12 +150,37 @@ static void SPI_sql_row_to_xmlelement(int rownum, StringInfo result, static inline void check_forbidden_chars(char * str) { - if (strpbrk(str,FORBIDDEN_C0) != NULL) + char * errchar; + + if ((errchar = strpbrk(str,FORBIDDEN_C0)) != NULL) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("character out of range"), - errdetail("XML does not support control characters."))); - + errmsg("illegal XML character \\u%.4x", *errchar))); + + if (GetDatabaseEncoding() == PG_UTF8) + { + int utf8c; + int utf8len = 0; + while (*str) + { + if (IS_HIGHBIT_SET(*str)) + { + utf8c = xmlGetUTF8Char(str, &utf8len); + str += utf8len; + if (! (utf8c < 0xdf00 || (utf8c > 0xdfff && + utf8c != 0xfffe && + utf8c != 0xfffe))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("illegal XML character \\u%.4x", utf8c))); + } + else + { + /* already checked the forbidden ASCII chars */ + str++; + } + } + } } #ifdef USE_LIBXML