Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/main/java/org/codehaus/plexus/util/xml/XmlReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,7 @@ private static String getXMLGuessEncoding(BufferedInputStream is) throws IOExcep
}

static final Pattern ENCODING_PATTERN =
Pattern.compile("<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE);
Pattern.compile("<\\?xml.*?encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE);

// returns the encoding declared in the <?xml encoding=...?>, NULL if none
private static String getXmlProlog(BufferedInputStream is, String guessedEnc) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,4 +261,25 @@ void encodingAttribute() throws IOException {
xml = "<element encoding='attribute value'/>";
checkXmlContent(xml, "UTF-8");
}

/**
* Test that the regex pattern handles edge cases efficiently without catastrophic backtracking.
* This validates the fix for polynomial regex vulnerability.
*
* @throws java.io.IOException if any.
*/
@Test
void encodingPatternWithManyAttributes() throws IOException {
// Test with many attributes before encoding to ensure non-greedy matching works
String xml = "<?xml version='1.0' a='1' b='2' c='3' d='4' e='5' encoding='UTF-8'?><root/>";
checkXmlContent(xml, "UTF-8");

// Test with whitespace variations
xml = "<?xml version='1.0' encoding = 'US-ASCII' ?><root/>";
checkXmlContent(xml, "US-ASCII");

// Test with longer prolog (but still valid)
xml = "<?xml version='1.0' standalone='yes' encoding='ISO-8859-1'?><root/>";
checkXmlContent(xml, "ISO-8859-1");
}
}