From e45c0a12a65bfea5ca9fbc1f7a9396bdfd01a4e0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 10 Oct 2025 20:33:30 +0000
Subject: [PATCH 1/2] Initial plan
From 2b18f63bf8d3bd6791c968ec6e12ddcb57f80fc1 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 10 Oct 2025 20:40:35 +0000
Subject: [PATCH 2/2] Fix polynomial regex vulnerability in ENCODING_PATTERN
Changed the regex pattern from .* to .*? to use non-greedy matching,
preventing catastrophic backtracking on malicious input.
Added test case to validate the fix with various edge cases.
Co-authored-by: slachiewicz <6705942+slachiewicz@users.noreply.github.com>
---
.../codehaus/plexus/util/xml/XmlReader.java | 2 +-
.../plexus/util/xml/XmlStreamReaderTest.java | 21 +++++++++++++++++++
2 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java b/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java
index f2a0316..6f0a0db 100644
--- a/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java
+++ b/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java
@@ -597,7 +597,7 @@ private static String getXMLGuessEncoding(BufferedInputStream is) throws IOExcep
}
static final Pattern ENCODING_PATTERN =
- Pattern.compile("<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE);
+ Pattern.compile("<\\?xml.*?encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE);
// returns the encoding declared in the , NULL if none
private static String getXmlProlog(BufferedInputStream is, String guessedEnc) throws IOException {
diff --git a/src/test/java/org/codehaus/plexus/util/xml/XmlStreamReaderTest.java b/src/test/java/org/codehaus/plexus/util/xml/XmlStreamReaderTest.java
index 0390ac2..17ba924 100644
--- a/src/test/java/org/codehaus/plexus/util/xml/XmlStreamReaderTest.java
+++ b/src/test/java/org/codehaus/plexus/util/xml/XmlStreamReaderTest.java
@@ -261,4 +261,25 @@ void encodingAttribute() throws IOException {
xml = "";
checkXmlContent(xml, "UTF-8");
}
+
+ /**
+ * Test that the regex pattern handles edge cases efficiently without catastrophic backtracking.
+ * This validates the fix for polynomial regex vulnerability.
+ *
+ * @throws java.io.IOException if any.
+ */
+ @Test
+ void encodingPatternWithManyAttributes() throws IOException {
+ // Test with many attributes before encoding to ensure non-greedy matching works
+ String xml = "";
+ checkXmlContent(xml, "UTF-8");
+
+ // Test with whitespace variations
+ xml = "";
+ checkXmlContent(xml, "US-ASCII");
+
+ // Test with longer prolog (but still valid)
+ xml = "";
+ checkXmlContent(xml, "ISO-8859-1");
+ }
}