diff --git a/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java b/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java index f2a0316..6f0a0db 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java +++ b/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java @@ -597,7 +597,7 @@ private static String getXMLGuessEncoding(BufferedInputStream is) throws IOExcep } static final Pattern ENCODING_PATTERN = - Pattern.compile("<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE); + Pattern.compile("<\\?xml.*?encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE); // returns the encoding declared in the , NULL if none private static String getXmlProlog(BufferedInputStream is, String guessedEnc) throws IOException { diff --git a/src/test/java/org/codehaus/plexus/util/xml/XmlStreamReaderTest.java b/src/test/java/org/codehaus/plexus/util/xml/XmlStreamReaderTest.java index 0390ac2..17ba924 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/XmlStreamReaderTest.java +++ b/src/test/java/org/codehaus/plexus/util/xml/XmlStreamReaderTest.java @@ -261,4 +261,25 @@ void encodingAttribute() throws IOException { xml = ""; checkXmlContent(xml, "UTF-8"); } + + /** + * Test that the regex pattern handles edge cases efficiently without catastrophic backtracking. + * This validates the fix for polynomial regex vulnerability. + * + * @throws java.io.IOException if any. + */ + @Test + void encodingPatternWithManyAttributes() throws IOException { + // Test with many attributes before encoding to ensure non-greedy matching works + String xml = ""; + checkXmlContent(xml, "UTF-8"); + + // Test with whitespace variations + xml = ""; + checkXmlContent(xml, "US-ASCII"); + + // Test with longer prolog (but still valid) + xml = ""; + checkXmlContent(xml, "ISO-8859-1"); + } }