Skip to content

Commit bab1398

Browse files
authored
gh-126033: fix UAF in xml.etree.ElementTree.Element.remove when concurrent mutations happen (#126124)
1 parent c57623c commit bab1398

File tree

3 files changed

+213
-36
lines changed

3 files changed

+213
-36
lines changed

Lib/test/test_xml_etree.py

+178-10
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
import textwrap
1919
import types
2020
import unittest
21+
import unittest.mock as mock
2122
import warnings
2223
import weakref
2324

25+
from contextlib import nullcontext
2426
from functools import partial
2527
from itertools import product, islice
2628
from test import support
@@ -121,6 +123,21 @@
121123
</foo>
122124
"""
123125

126+
def is_python_implementation():
127+
assert ET is not None, "ET must be initialized"
128+
assert pyET is not None, "pyET must be initialized"
129+
return ET is pyET
130+
131+
132+
def equal_wrapper(cls):
133+
"""Mock cls.__eq__ to check whether it has been called or not.
134+
135+
The behaviour of cls.__eq__ (side-effects included) is left as is.
136+
"""
137+
eq = cls.__eq__
138+
return mock.patch.object(cls, "__eq__", autospec=True, wraps=eq)
139+
140+
124141
def checkwarnings(*filters, quiet=False):
125142
def decorator(test):
126143
def newtest(*args, **kwargs):
@@ -2642,6 +2659,7 @@ def test_pickle_issue18997(self):
26422659

26432660

26442661
class BadElementTest(ElementTestCase, unittest.TestCase):
2662+
26452663
def test_extend_mutable_list(self):
26462664
class X:
26472665
@property
@@ -2680,18 +2698,168 @@ class Y(X, ET.Element):
26802698
e = ET.Element('foo')
26812699
e.extend(L)
26822700

2683-
def test_remove_with_mutating(self):
2684-
class X(ET.Element):
2701+
def test_remove_with_clear_assume_missing(self):
2702+
# gh-126033: Check that a concurrent clear() for an assumed-to-be
2703+
# missing element does not make the interpreter crash.
2704+
self.do_test_remove_with_clear(raises=True)
2705+
2706+
def test_remove_with_clear_assume_existing(self):
2707+
# gh-126033: Check that a concurrent clear() for an assumed-to-be
2708+
# existing element does not make the interpreter crash.
2709+
self.do_test_remove_with_clear(raises=False)
2710+
2711+
def do_test_remove_with_clear(self, *, raises):
2712+
2713+
# Until the discrepency between "del root[:]" and "root.clear()" is
2714+
# resolved, we need to keep two tests. Previously, using "del root[:]"
2715+
# did not crash with the reproducer of gh-126033 while "root.clear()"
2716+
# did.
2717+
2718+
class E(ET.Element):
2719+
"""Local class to be able to mock E.__eq__ for introspection."""
2720+
2721+
class X(E):
26852722
def __eq__(self, o):
2686-
del e[:]
2687-
return False
2688-
e = ET.Element('foo')
2689-
e.extend([X('bar')])
2690-
self.assertRaises(ValueError, e.remove, ET.Element('baz'))
2723+
del root[:]
2724+
return not raises
26912725

2692-
e = ET.Element('foo')
2693-
e.extend([ET.Element('bar')])
2694-
self.assertRaises(ValueError, e.remove, X('baz'))
2726+
class Y(E):
2727+
def __eq__(self, o):
2728+
root.clear()
2729+
return not raises
2730+
2731+
if raises:
2732+
get_checker_context = lambda: self.assertRaises(ValueError)
2733+
else:
2734+
get_checker_context = nullcontext
2735+
2736+
self.assertIs(E.__eq__, object.__eq__)
2737+
2738+
for Z, side_effect in [(X, 'del root[:]'), (Y, 'root.clear()')]:
2739+
self.enterContext(self.subTest(side_effect=side_effect))
2740+
2741+
# test removing R() from [U()]
2742+
for R, U, description in [
2743+
(E, Z, "remove missing E() from [Z()]"),
2744+
(Z, E, "remove missing Z() from [E()]"),
2745+
(Z, Z, "remove missing Z() from [Z()]"),
2746+
]:
2747+
with self.subTest(description):
2748+
root = E('top')
2749+
root.extend([U('one')])
2750+
with get_checker_context():
2751+
root.remove(R('missing'))
2752+
2753+
# test removing R() from [U(), V()]
2754+
cases = self.cases_for_remove_missing_with_mutations(E, Z)
2755+
for R, U, V, description in cases:
2756+
with self.subTest(description):
2757+
root = E('top')
2758+
root.extend([U('one'), V('two')])
2759+
with get_checker_context():
2760+
root.remove(R('missing'))
2761+
2762+
# Test removing root[0] from [Z()].
2763+
#
2764+
# Since we call root.remove() with root[0], Z.__eq__()
2765+
# will not be called (we branch on the fast Py_EQ path).
2766+
with self.subTest("remove root[0] from [Z()]"):
2767+
root = E('top')
2768+
root.append(Z('rem'))
2769+
with equal_wrapper(E) as f, equal_wrapper(Z) as g:
2770+
root.remove(root[0])
2771+
f.assert_not_called()
2772+
g.assert_not_called()
2773+
2774+
# Test removing root[1] (of type R) from [U(), R()].
2775+
is_special = is_python_implementation() and raises and Z is Y
2776+
if is_python_implementation() and raises and Z is Y:
2777+
# In pure Python, using root.clear() sets the children
2778+
# list to [] without calling list.clear().
2779+
#
2780+
# For this reason, the call to root.remove() first
2781+
# checks root[0] and sets the children list to []
2782+
# since either root[0] or root[1] is an evil element.
2783+
#
2784+
# Since checking root[1] still uses the old reference
2785+
# to the children list, PyObject_RichCompareBool() branches
2786+
# to the fast Py_EQ path and Y.__eq__() is called exactly
2787+
# once (when checking root[0]).
2788+
continue
2789+
else:
2790+
cases = self.cases_for_remove_existing_with_mutations(E, Z)
2791+
for R, U, description in cases:
2792+
with self.subTest(description):
2793+
root = E('top')
2794+
root.extend([U('one'), R('rem')])
2795+
with get_checker_context():
2796+
root.remove(root[1])
2797+
2798+
def test_remove_with_mutate_root_assume_missing(self):
2799+
# gh-126033: Check that a concurrent mutation for an assumed-to-be
2800+
# missing element does not make the interpreter crash.
2801+
self.do_test_remove_with_mutate_root(raises=True)
2802+
2803+
def test_remove_with_mutate_root_assume_existing(self):
2804+
# gh-126033: Check that a concurrent mutation for an assumed-to-be
2805+
# existing element does not make the interpreter crash.
2806+
self.do_test_remove_with_mutate_root(raises=False)
2807+
2808+
def do_test_remove_with_mutate_root(self, *, raises):
2809+
E = ET.Element
2810+
2811+
class Z(E):
2812+
def __eq__(self, o):
2813+
del root[0]
2814+
return not raises
2815+
2816+
if raises:
2817+
get_checker_context = lambda: self.assertRaises(ValueError)
2818+
else:
2819+
get_checker_context = nullcontext
2820+
2821+
# test removing R() from [U(), V()]
2822+
cases = self.cases_for_remove_missing_with_mutations(E, Z)
2823+
for R, U, V, description in cases:
2824+
with self.subTest(description):
2825+
root = E('top')
2826+
root.extend([U('one'), V('two')])
2827+
with get_checker_context():
2828+
root.remove(R('missing'))
2829+
2830+
# test removing root[1] (of type R) from [U(), R()]
2831+
cases = self.cases_for_remove_existing_with_mutations(E, Z)
2832+
for R, U, description in cases:
2833+
with self.subTest(description):
2834+
root = E('top')
2835+
root.extend([U('one'), R('rem')])
2836+
with get_checker_context():
2837+
root.remove(root[1])
2838+
2839+
def cases_for_remove_missing_with_mutations(self, E, Z):
2840+
# Cases for removing R() from [U(), V()].
2841+
# The case U = V = R = E is not interesting as there is no mutation.
2842+
for U, V in [(E, Z), (Z, E), (Z, Z)]:
2843+
description = (f"remove missing {E.__name__}() from "
2844+
f"[{U.__name__}(), {V.__name__}()]")
2845+
yield E, U, V, description
2846+
2847+
for U, V in [(E, E), (E, Z), (Z, E), (Z, Z)]:
2848+
description = (f"remove missing {Z.__name__}() from "
2849+
f"[{U.__name__}(), {V.__name__}()]")
2850+
yield Z, U, V, description
2851+
2852+
def cases_for_remove_existing_with_mutations(self, E, Z):
2853+
# Cases for removing root[1] (of type R) from [U(), R()].
2854+
# The case U = R = E is not interesting as there is no mutation.
2855+
for U, R, description in [
2856+
(E, Z, "remove root[1] from [E(), Z()]"),
2857+
(Z, E, "remove root[1] from [Z(), E()]"),
2858+
(Z, Z, "remove root[1] from [Z(), Z()]"),
2859+
]:
2860+
description = (f"remove root[1] (of type {R.__name__}) "
2861+
f"from [{U.__name__}(), {R.__name__}()]")
2862+
yield R, U, description
26952863

26962864
@support.infinite_recursion(25)
26972865
def test_recursive_repr(self):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.remove
2+
<xml.etree.ElementTree.Element.remove>` when the element is
3+
concurrently mutated. Patch by Bénédikt Tran.

Modules/_elementtree.c

+32-26
Original file line numberDiff line numberDiff line change
@@ -850,6 +850,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
850850
if (element_resize(element, self->extra->length) < 0)
851851
goto error;
852852

853+
// TODO(picnixz): check for an evil child's __deepcopy__ on 'self'
853854
for (i = 0; i < self->extra->length; i++) {
854855
PyObject* child = deepcopy(st, self->extra->children[i], memo);
855856
if (!child || !Element_Check(st, child)) {
@@ -1629,42 +1630,47 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
16291630
/*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/
16301631
{
16311632
Py_ssize_t i;
1632-
int rc;
1633-
PyObject *found;
1634-
1635-
if (!self->extra) {
1636-
/* element has no children, so raise exception */
1637-
PyErr_SetString(
1638-
PyExc_ValueError,
1639-
"list.remove(x): x not in list"
1640-
);
1641-
return NULL;
1642-
}
1643-
1644-
for (i = 0; i < self->extra->length; i++) {
1645-
if (self->extra->children[i] == subelement)
1646-
break;
1647-
rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
1648-
if (rc > 0)
1633+
// When iterating over the list of children, we need to check that the
1634+
// list is not cleared (self->extra != NULL) and that we are still within
1635+
// the correct bounds (i < self->extra->length).
1636+
//
1637+
// We deliberately avoid protecting against children lists that grow
1638+
// faster than the index since list objects do not protect against it.
1639+
int rc = 0;
1640+
for (i = 0; self->extra && i < self->extra->length; i++) {
1641+
if (self->extra->children[i] == subelement) {
1642+
rc = 1;
16491643
break;
1650-
if (rc < 0)
1644+
}
1645+
PyObject *child = Py_NewRef(self->extra->children[i]);
1646+
rc = PyObject_RichCompareBool(child, subelement, Py_EQ);
1647+
Py_DECREF(child);
1648+
if (rc < 0) {
16511649
return NULL;
1650+
}
1651+
else if (rc > 0) {
1652+
break;
1653+
}
16521654
}
16531655

1654-
if (i >= self->extra->length) {
1655-
/* subelement is not in children, so raise exception */
1656-
PyErr_SetString(
1657-
PyExc_ValueError,
1658-
"list.remove(x): x not in list"
1659-
);
1656+
if (rc == 0) {
1657+
PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
16601658
return NULL;
16611659
}
16621660

1663-
found = self->extra->children[i];
1661+
// An extra check must be done if the mutation occurs at the very last
1662+
// step and removes or clears the 'extra' list (the condition on the
1663+
// length would not be satisfied any more).
1664+
if (self->extra == NULL || i >= self->extra->length) {
1665+
Py_RETURN_NONE;
1666+
}
1667+
1668+
PyObject *found = self->extra->children[i];
16641669

16651670
self->extra->length--;
1666-
for (; i < self->extra->length; i++)
1671+
for (; i < self->extra->length; i++) {
16671672
self->extra->children[i] = self->extra->children[i+1];
1673+
}
16681674

16691675
Py_DECREF(found);
16701676
Py_RETURN_NONE;

0 commit comments

Comments
 (0)