-
-
Notifications
You must be signed in to change notification settings - Fork 32.2k
bpo-32424: Synchronize xml.etree.ElementTree.Element copy methods. #5046
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
# monkey-patched when running the "test_xml_etree_c" test suite. | ||
|
||
import copy | ||
import itertools | ||
import functools | ||
import html | ||
import io | ||
|
@@ -109,6 +110,11 @@ def newtest(*args, **kwargs): | |
return decorator | ||
|
||
|
||
#----------------------------------------------------------------------------- | ||
# UNIT TESTS | ||
#----------------------------------------------------------------------------- | ||
|
||
|
||
class ModuleTest(unittest.TestCase): | ||
def test_sanity(self): | ||
# Import sanity. | ||
|
@@ -122,6 +128,143 @@ def test_all(self): | |
support.check__all__(self, ET, names, blacklist=("HTML_EMPTY",)) | ||
|
||
|
||
class ElementTree_Element_UnitTest(unittest.TestCase): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The name of this class is not in line with other names. |
||
|
||
def test___init__(self): | ||
tag = "foo" | ||
attrib = { "zix": "wyp" } | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Too much empty lines. Remove unneeded empty lines. |
||
element_foo = ET.Element(tag, attrib) | ||
|
||
with self.subTest("traits of an element"): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not needed. It doesn't match the style of other tests. |
||
self.assertIsInstance(element_foo, ET.Element) | ||
self.assertIn("tag", dir(element_foo)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is already tested in test_interface. |
||
self.assertIn("attrib", dir(element_foo)) | ||
self.assertIn("text", dir(element_foo)) | ||
self.assertIn("tail", dir(element_foo)) | ||
|
||
with self.subTest("string attributes have expected values"): | ||
self.assertEqual(element_foo.tag, tag) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is already tested in test_set_attribute. |
||
self.assertIsNone(element_foo.text) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This may be an implementation detail. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't that be definitively decided one way or the other? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually it already is tested in test_set_attribute. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not an implementation detail and there is a general difference between "no text" (i.e. None) and "empty text" (i.e. the empty string), which can be used for formatting. Agreed that we should avoid duplicating tests, though. Preferably, there should be a single "obvious" place where a feature or property is tested. |
||
self.assertIsNone(element_foo.tail) | ||
|
||
with self.subTest("attrib is a copy"): | ||
self.assertIsNot(element_foo.attrib, attrib) | ||
self.assertEqual(element_foo.attrib, attrib) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is already tested in test_attrib. |
||
|
||
with self.subTest("attrib isn't linked"): | ||
attrib["bar"] = "baz" | ||
|
||
self.assertIsNot(element_foo.attrib, attrib) | ||
self.assertNotEqual(element_foo.attrib, attrib) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is already tested in test_attrib. |
||
|
||
def test_copy(self): | ||
element_foo = ET.Element("foo", { "zix": "wyp" }) | ||
element_foo.append(ET.Element("bar", { "baz": "qix" })) | ||
|
||
with self.assertWarns(DeprecationWarning): | ||
element_foo2 = element_foo.copy() | ||
|
||
with self.subTest("elements are not the same"): | ||
self.assertIsNot(element_foo2, element_foo) | ||
|
||
with self.subTest("string attributes are the same"): | ||
self.assertIs(element_foo2.tag, element_foo.tag) | ||
self.assertIs(element_foo2.text, element_foo.text) | ||
self.assertIs(element_foo2.tail, element_foo.tail) | ||
|
||
with self.subTest("string attributes are equal"): | ||
self.assertEqual(element_foo2.tag, element_foo.tag) | ||
self.assertEqual(element_foo2.text, element_foo.text) | ||
self.assertEqual(element_foo2.tail, element_foo.tail) | ||
|
||
with self.subTest("children are the same"): | ||
for (child1, child2) in itertools.zip_longest(element_foo, element_foo2): | ||
self.assertIs(child1, child2) | ||
|
||
with self.subTest("attrib is a copy"): | ||
self.assertIsNot(element_foo2.attrib, element_foo.attrib) | ||
self.assertEqual(element_foo2.attrib, element_foo.attrib) | ||
|
||
with self.subTest("attrib isn't linked"): | ||
element_foo.attrib["bar"] = "baz" | ||
|
||
self.assertIsNot(element_foo2.attrib, element_foo.attrib) | ||
self.assertNotEqual(element_foo2.attrib, element_foo.attrib) | ||
|
||
def test___copy__(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is an existing test for copying: test_copy. |
||
element_foo = ET.Element("foo", { "zix": "wyp" }) | ||
element_foo.append(ET.Element("bar", { "baz": "qix" })) | ||
|
||
element_foo2 = copy.copy(element_foo) | ||
|
||
with self.subTest("elements are not the same"): | ||
self.assertIsNot(element_foo2, element_foo) | ||
|
||
with self.subTest("string attributes are the same"): | ||
self.assertIs(element_foo2.tag, element_foo.tag) | ||
self.assertIs(element_foo2.text, element_foo.text) | ||
self.assertIs(element_foo2.tail, element_foo.tail) | ||
|
||
with self.subTest("string attributes are equal"): | ||
self.assertEqual(element_foo2.tag, element_foo.tag) | ||
self.assertEqual(element_foo2.text, element_foo.text) | ||
self.assertEqual(element_foo2.tail, element_foo.tail) | ||
|
||
with self.subTest("children are the same"): | ||
for (child1, child2) in itertools.zip_longest(element_foo, element_foo2): | ||
self.assertIs(child1, child2) | ||
|
||
with self.subTest("attrib is a copy"): | ||
self.assertIsNot(element_foo2.attrib, element_foo.attrib) | ||
self.assertEqual(element_foo2.attrib, element_foo.attrib) | ||
|
||
with self.subTest("attrib isn't linked"): | ||
element_foo.attrib["bar"] = "baz" | ||
|
||
self.assertIsNot(element_foo2.attrib, element_foo.attrib) | ||
self.assertNotEqual(element_foo2.attrib, element_foo.attrib) | ||
|
||
def test___deepcopy__(self): | ||
element_foo = ET.Element("foo", { "zix": "wyp" }) | ||
element_foo.append(ET.Element("bar", { "baz": "qix" })) | ||
|
||
element_foo2 = copy.deepcopy(element_foo) | ||
|
||
with self.subTest("elements are not the same"): | ||
self.assertIsNot(element_foo2, element_foo) | ||
|
||
# Strings should still be the same in a deep copy. | ||
with self.subTest("string attributes are the same"): | ||
self.assertIs(element_foo2.tag, element_foo.tag) | ||
self.assertIs(element_foo2.text, element_foo.text) | ||
self.assertIs(element_foo2.tail, element_foo.tail) | ||
|
||
with self.subTest("string attributes are equal"): | ||
self.assertEqual(element_foo2.tag, element_foo.tag) | ||
self.assertEqual(element_foo2.text, element_foo.text) | ||
self.assertEqual(element_foo2.tail, element_foo.tail) | ||
|
||
with self.subTest("children are not the same"): | ||
for (child1, child2) in itertools.zip_longest(element_foo, element_foo2): | ||
self.assertIsNot(child1, child2) | ||
|
||
with self.subTest("attrib is a copy"): | ||
self.assertIsNot(element_foo2.attrib, element_foo.attrib) | ||
self.assertEqual(element_foo2.attrib, element_foo.attrib) | ||
|
||
with self.subTest("attrib isn't linked"): | ||
element_foo.attrib["bar"] = "baz" | ||
|
||
self.assertIsNot(element_foo2.attrib, element_foo.attrib) | ||
self.assertNotEqual(element_foo2.attrib, element_foo.attrib) | ||
|
||
|
||
#----------------------------------------------------------------------------- | ||
# INTEGRATION TESTS | ||
#----------------------------------------------------------------------------- | ||
|
||
|
||
def serialize(elem, to_string=True, encoding='unicode', **options): | ||
if encoding != 'unicode': | ||
file = io.BytesIO() | ||
|
@@ -1961,36 +2104,38 @@ def test_cyclic_gc(self): | |
class Dummy: | ||
pass | ||
|
||
# Test the shortest cycle: d->element->d | ||
d = Dummy() | ||
d.dummyref = ET.Element('joe', attr=d) | ||
wref = weakref.ref(d) | ||
del d | ||
gc_collect() | ||
self.assertIsNone(wref()) | ||
|
||
# A longer cycle: d->e->e2->d | ||
e = ET.Element('joe') | ||
d = Dummy() | ||
d.dummyref = e | ||
wref = weakref.ref(d) | ||
e2 = ET.SubElement(e, 'foo', attr=d) | ||
del d, e, e2 | ||
gc_collect() | ||
self.assertIsNone(wref()) | ||
|
||
# A cycle between Element objects as children of one another | ||
# e1->e2->e3->e1 | ||
e1 = ET.Element('e1') | ||
e2 = ET.Element('e2') | ||
e3 = ET.Element('e3') | ||
e1.append(e2) | ||
e2.append(e2) | ||
e3.append(e1) | ||
wref = weakref.ref(e1) | ||
del e1, e2, e3 | ||
gc_collect() | ||
self.assertIsNone(wref()) | ||
with self.subTest("Test the shortest cycle: d->element->d"): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unrelated changes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These changes helped me to debug the changes I made to the C implementation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then please remove them after finishing debugging. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My point was that they were not wholly unrelated. What is the harm in including them here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with @GPHemsley that it's better to have subtests than multiple tests in a single test function. Although having multiple test functions also wouldn't be all too bad. :) |
||
d = Dummy() | ||
d.dummyref = ET.Element('joe', attr=d) | ||
wref = weakref.ref(d) | ||
del d | ||
gc_collect() | ||
self.assertIsNone(wref()) | ||
|
||
with self.subTest("A longer cycle: d->e->e2->d"): | ||
e = ET.Element('joe') | ||
d = Dummy() | ||
d.dummyref = e | ||
wref = weakref.ref(d) | ||
e2 = ET.SubElement(e, 'foo', attr=d) | ||
del d, e, e2 | ||
gc_collect() | ||
self.assertIsNone(wref()) | ||
|
||
with self.subTest( | ||
"A cycle between Element objects as children of one another: " | ||
"e1->e2->e3->e1" | ||
): | ||
e1 = ET.Element('e1') | ||
e2 = ET.Element('e2') | ||
e3 = ET.Element('e3') | ||
e3.append(e1) | ||
e2.append(e3) | ||
e1.append(e2) | ||
wref = weakref.ref(e1) | ||
del e1, e2, e3 | ||
gc_collect() | ||
self.assertIsNone(wref()) | ||
|
||
def test_weakref(self): | ||
flag = False | ||
|
@@ -3302,6 +3447,7 @@ def test_main(module=None): | |
|
||
test_classes = [ | ||
ModuleTest, | ||
ElementTree_Element_UnitTest, | ||
ElementSlicingTest, | ||
BasicElementTest, | ||
BadElementTest, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -94,6 +94,7 @@ | |
import sys | ||
import re | ||
import warnings | ||
import copy | ||
import io | ||
import collections | ||
import collections.abc | ||
|
@@ -194,12 +195,35 @@ def copy(self): | |
original tree. | ||
|
||
""" | ||
warnings.warn( | ||
"elem.copy() is deprecated. Use copy.copy(elem) instead.", | ||
DeprecationWarning | ||
) | ||
return self.__copy__() | ||
|
||
def __copy__(self): | ||
elem = self.makeelement(self.tag, self.attrib) | ||
elem.text = self.text | ||
elem.tail = self.tail | ||
elem[:] = self | ||
return elem | ||
|
||
def __deepcopy__(self, memo): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method is not needed in Python implementation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer you have this discussion on the bpo ticket, as PEP 399 suggests otherwise. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't contradict PEP 399. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The intention of PEP-399 is that both implementations behave the same. It's not that they use exactly the same mechanisms to reach that goal. Some things work in Python that don't easily work in C, and vice-versa. That's why there are two implementations in the first place. If we can avoid adding Python (or C) code without sacrificing that goal, then that is a good thing. Code that does not exist is entirely free of bugs, for example. :) |
||
tag = copy.deepcopy(self.tag, memo) | ||
attrib = copy.deepcopy(self.attrib, memo) | ||
|
||
elem = self.makeelement(tag, attrib) | ||
|
||
elem.text = copy.deepcopy(self.text, memo) | ||
elem.tail = copy.deepcopy(self.tail, memo) | ||
|
||
for child in self: | ||
elem.append(copy.deepcopy(child, memo)) | ||
|
||
memo[id(self)] = elem | ||
|
||
return elem | ||
|
||
def __len__(self): | ||
return len(self._children) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -295,10 +295,12 @@ create_new_element(PyObject* tag, PyObject* attrib) | |
PyObject_GC_Track(self); | ||
|
||
if (attrib != Py_None && !is_empty_dict(attrib)) { | ||
attrib = PyDict_Copy(attrib); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why this change has been added? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because the results of the tests in this commit uncovered a discrepancy between the C and Python implementations whereby the attrib list is not copied in the C version beyond the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In many case this makes a redundant copying since There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be moved out of this function into the caller(s) that need it? |
||
if (create_extra(self, attrib) < 0) { | ||
Py_DECREF(self); | ||
return NULL; | ||
} | ||
Py_DECREF(attrib); | ||
} | ||
|
||
return (PyObject*) self; | ||
|
@@ -759,6 +761,24 @@ _elementtree_Element___copy___impl(ElementObject *self) | |
return (PyObject*) element; | ||
} | ||
|
||
/*[clinic input] | ||
_elementtree.Element.copy | ||
|
||
[clinic start generated code]*/ | ||
|
||
static PyObject * | ||
_elementtree_Element_copy_impl(ElementObject *self) | ||
/*[clinic end generated code: output=84660e8524276b22 input=1f8134305a7719a3]*/ | ||
{ | ||
if (PyErr_WarnEx(PyExc_DeprecationWarning, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm personally ok with deprecating this right away (lxml never provided such a |
||
"elem.copy() is deprecated. Use copy.copy(elem) instead.", | ||
1) < 0) { | ||
return NULL; | ||
} | ||
|
||
return _elementtree_Element___copy___impl(self); | ||
} | ||
|
||
/* Helper for a deep copy. */ | ||
LOCAL(PyObject *) deepcopy(PyObject *, PyObject *); | ||
|
||
|
@@ -3801,6 +3821,7 @@ static PyMethodDef element_methods[] = { | |
|
||
_ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF | ||
|
||
_ELEMENTTREE_ELEMENT_COPY_METHODDEF | ||
_ELEMENTTREE_ELEMENT___COPY___METHODDEF | ||
_ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF | ||
_ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This comment is redundant.