Skip to content

bpo-32424: Synchronize xml.etree.ElementTree.Element copy methods. #5046

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 176 additions & 30 deletions Lib/test/test_xml_etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# monkey-patched when running the "test_xml_etree_c" test suite.

import copy
import itertools
import functools
import html
import io
Expand Down Expand Up @@ -109,6 +110,11 @@ def newtest(*args, **kwargs):
return decorator


#-----------------------------------------------------------------------------
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is redundant.

# UNIT TESTS
#-----------------------------------------------------------------------------


class ModuleTest(unittest.TestCase):
def test_sanity(self):
# Import sanity.
Expand All @@ -122,6 +128,143 @@ def test_all(self):
support.check__all__(self, ET, names, blacklist=("HTML_EMPTY",))


class ElementTree_Element_UnitTest(unittest.TestCase):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name of this class is not in line with other names.


def test___init__(self):
tag = "foo"
attrib = { "zix": "wyp" }

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Too much empty lines. Remove unneeded empty lines.

element_foo = ET.Element(tag, attrib)

with self.subTest("traits of an element"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not needed. It doesn't match the style of other tests.

self.assertIsInstance(element_foo, ET.Element)
self.assertIn("tag", dir(element_foo))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is already tested in test_interface.

self.assertIn("attrib", dir(element_foo))
self.assertIn("text", dir(element_foo))
self.assertIn("tail", dir(element_foo))

with self.subTest("string attributes have expected values"):
self.assertEqual(element_foo.tag, tag)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is already tested in test_set_attribute.

self.assertIsNone(element_foo.text)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may be an implementation detail.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't that be definitively decided one way or the other?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually it already is tested in test_set_attribute.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not an implementation detail and there is a general difference between "no text" (i.e. None) and "empty text" (i.e. the empty string), which can be used for formatting.

Agreed that we should avoid duplicating tests, though. Preferably, there should be a single "obvious" place where a feature or property is tested.

self.assertIsNone(element_foo.tail)

with self.subTest("attrib is a copy"):
self.assertIsNot(element_foo.attrib, attrib)
self.assertEqual(element_foo.attrib, attrib)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is already tested in test_attrib.


with self.subTest("attrib isn't linked"):
attrib["bar"] = "baz"

self.assertIsNot(element_foo.attrib, attrib)
self.assertNotEqual(element_foo.attrib, attrib)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is already tested in test_attrib.


def test_copy(self):
element_foo = ET.Element("foo", { "zix": "wyp" })
element_foo.append(ET.Element("bar", { "baz": "qix" }))

with self.assertWarns(DeprecationWarning):
element_foo2 = element_foo.copy()

with self.subTest("elements are not the same"):
self.assertIsNot(element_foo2, element_foo)

with self.subTest("string attributes are the same"):
self.assertIs(element_foo2.tag, element_foo.tag)
self.assertIs(element_foo2.text, element_foo.text)
self.assertIs(element_foo2.tail, element_foo.tail)

with self.subTest("string attributes are equal"):
self.assertEqual(element_foo2.tag, element_foo.tag)
self.assertEqual(element_foo2.text, element_foo.text)
self.assertEqual(element_foo2.tail, element_foo.tail)

with self.subTest("children are the same"):
for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
self.assertIs(child1, child2)

with self.subTest("attrib is a copy"):
self.assertIsNot(element_foo2.attrib, element_foo.attrib)
self.assertEqual(element_foo2.attrib, element_foo.attrib)

with self.subTest("attrib isn't linked"):
element_foo.attrib["bar"] = "baz"

self.assertIsNot(element_foo2.attrib, element_foo.attrib)
self.assertNotEqual(element_foo2.attrib, element_foo.attrib)

def test___copy__(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is an existing test for copying: test_copy.

element_foo = ET.Element("foo", { "zix": "wyp" })
element_foo.append(ET.Element("bar", { "baz": "qix" }))

element_foo2 = copy.copy(element_foo)

with self.subTest("elements are not the same"):
self.assertIsNot(element_foo2, element_foo)

with self.subTest("string attributes are the same"):
self.assertIs(element_foo2.tag, element_foo.tag)
self.assertIs(element_foo2.text, element_foo.text)
self.assertIs(element_foo2.tail, element_foo.tail)

with self.subTest("string attributes are equal"):
self.assertEqual(element_foo2.tag, element_foo.tag)
self.assertEqual(element_foo2.text, element_foo.text)
self.assertEqual(element_foo2.tail, element_foo.tail)

with self.subTest("children are the same"):
for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
self.assertIs(child1, child2)

with self.subTest("attrib is a copy"):
self.assertIsNot(element_foo2.attrib, element_foo.attrib)
self.assertEqual(element_foo2.attrib, element_foo.attrib)

with self.subTest("attrib isn't linked"):
element_foo.attrib["bar"] = "baz"

self.assertIsNot(element_foo2.attrib, element_foo.attrib)
self.assertNotEqual(element_foo2.attrib, element_foo.attrib)

def test___deepcopy__(self):
element_foo = ET.Element("foo", { "zix": "wyp" })
element_foo.append(ET.Element("bar", { "baz": "qix" }))

element_foo2 = copy.deepcopy(element_foo)

with self.subTest("elements are not the same"):
self.assertIsNot(element_foo2, element_foo)

# Strings should still be the same in a deep copy.
with self.subTest("string attributes are the same"):
self.assertIs(element_foo2.tag, element_foo.tag)
self.assertIs(element_foo2.text, element_foo.text)
self.assertIs(element_foo2.tail, element_foo.tail)

with self.subTest("string attributes are equal"):
self.assertEqual(element_foo2.tag, element_foo.tag)
self.assertEqual(element_foo2.text, element_foo.text)
self.assertEqual(element_foo2.tail, element_foo.tail)

with self.subTest("children are not the same"):
for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
self.assertIsNot(child1, child2)

with self.subTest("attrib is a copy"):
self.assertIsNot(element_foo2.attrib, element_foo.attrib)
self.assertEqual(element_foo2.attrib, element_foo.attrib)

with self.subTest("attrib isn't linked"):
element_foo.attrib["bar"] = "baz"

self.assertIsNot(element_foo2.attrib, element_foo.attrib)
self.assertNotEqual(element_foo2.attrib, element_foo.attrib)


#-----------------------------------------------------------------------------
# INTEGRATION TESTS
#-----------------------------------------------------------------------------


def serialize(elem, to_string=True, encoding='unicode', **options):
if encoding != 'unicode':
file = io.BytesIO()
Expand Down Expand Up @@ -1961,36 +2104,38 @@ def test_cyclic_gc(self):
class Dummy:
pass

# Test the shortest cycle: d->element->d
d = Dummy()
d.dummyref = ET.Element('joe', attr=d)
wref = weakref.ref(d)
del d
gc_collect()
self.assertIsNone(wref())

# A longer cycle: d->e->e2->d
e = ET.Element('joe')
d = Dummy()
d.dummyref = e
wref = weakref.ref(d)
e2 = ET.SubElement(e, 'foo', attr=d)
del d, e, e2
gc_collect()
self.assertIsNone(wref())

# A cycle between Element objects as children of one another
# e1->e2->e3->e1
e1 = ET.Element('e1')
e2 = ET.Element('e2')
e3 = ET.Element('e3')
e1.append(e2)
e2.append(e2)
e3.append(e1)
wref = weakref.ref(e1)
del e1, e2, e3
gc_collect()
self.assertIsNone(wref())
with self.subTest("Test the shortest cycle: d->element->d"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated changes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These changes helped me to debug the changes I made to the C implementation.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then please remove them after finishing debugging.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My point was that they were not wholly unrelated. What is the harm in including them here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with @GPHemsley that it's better to have subtests than multiple tests in a single test function. Although having multiple test functions also wouldn't be all too bad. :)
Yes, they are unrelated to this specific ticket, but they don't really seem big enough to merit their own ticket and PR. They somewhat match the general refactoring of lengthy tests into subtests that this change does in other places. It's at least an edge case.

d = Dummy()
d.dummyref = ET.Element('joe', attr=d)
wref = weakref.ref(d)
del d
gc_collect()
self.assertIsNone(wref())

with self.subTest("A longer cycle: d->e->e2->d"):
e = ET.Element('joe')
d = Dummy()
d.dummyref = e
wref = weakref.ref(d)
e2 = ET.SubElement(e, 'foo', attr=d)
del d, e, e2
gc_collect()
self.assertIsNone(wref())

with self.subTest(
"A cycle between Element objects as children of one another: "
"e1->e2->e3->e1"
):
e1 = ET.Element('e1')
e2 = ET.Element('e2')
e3 = ET.Element('e3')
e3.append(e1)
e2.append(e3)
e1.append(e2)
wref = weakref.ref(e1)
del e1, e2, e3
gc_collect()
self.assertIsNone(wref())

def test_weakref(self):
flag = False
Expand Down Expand Up @@ -3302,6 +3447,7 @@ def test_main(module=None):

test_classes = [
ModuleTest,
ElementTree_Element_UnitTest,
ElementSlicingTest,
BasicElementTest,
BadElementTest,
Expand Down
24 changes: 24 additions & 0 deletions Lib/xml/etree/ElementTree.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
import sys
import re
import warnings
import copy
import io
import collections
import collections.abc
Expand Down Expand Up @@ -194,12 +195,35 @@ def copy(self):
original tree.

"""
warnings.warn(
"elem.copy() is deprecated. Use copy.copy(elem) instead.",
DeprecationWarning
)
return self.__copy__()

def __copy__(self):
elem = self.makeelement(self.tag, self.attrib)
elem.text = self.text
elem.tail = self.tail
elem[:] = self
return elem

def __deepcopy__(self, memo):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method is not needed in Python implementation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer you have this discussion on the bpo ticket, as PEP 399 suggests otherwise.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't contradict PEP 399.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The intention of PEP-399 is that both implementations behave the same. It's not that they use exactly the same mechanisms to reach that goal. Some things work in Python that don't easily work in C, and vice-versa. That's why there are two implementations in the first place.

If we can avoid adding Python (or C) code without sacrificing that goal, then that is a good thing. Code that does not exist is entirely free of bugs, for example. :)

tag = copy.deepcopy(self.tag, memo)
attrib = copy.deepcopy(self.attrib, memo)

elem = self.makeelement(tag, attrib)

elem.text = copy.deepcopy(self.text, memo)
elem.tail = copy.deepcopy(self.tail, memo)

for child in self:
elem.append(copy.deepcopy(child, memo))

memo[id(self)] = elem

return elem

def __len__(self):
return len(self._children)

Expand Down
21 changes: 21 additions & 0 deletions Modules/_elementtree.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,10 +295,12 @@ create_new_element(PyObject* tag, PyObject* attrib)
PyObject_GC_Track(self);

if (attrib != Py_None && !is_empty_dict(attrib)) {
attrib = PyDict_Copy(attrib);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change has been added?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the results of the tests in this commit uncovered a discrepancy between the C and Python implementations whereby the attrib list is not copied in the C version beyond the __init__ function. (The Python implementation always uses __init__ whenever it creates a new element, but the C implementation uses create_new_element in all locations except the original __init__ function.)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In many case this makes a redundant copying since create_new_element is called after copying a dict.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be moved out of this function into the caller(s) that need it?

if (create_extra(self, attrib) < 0) {
Py_DECREF(self);
return NULL;
}
Py_DECREF(attrib);
}

return (PyObject*) self;
Expand Down Expand Up @@ -759,6 +761,24 @@ _elementtree_Element___copy___impl(ElementObject *self)
return (PyObject*) element;
}

/*[clinic input]
_elementtree.Element.copy

[clinic start generated code]*/

static PyObject *
_elementtree_Element_copy_impl(ElementObject *self)
/*[clinic end generated code: output=84660e8524276b22 input=1f8134305a7719a3]*/
{
if (PyErr_WarnEx(PyExc_DeprecationWarning,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm personally ok with deprecating this right away (lxml never provided such a .copy() method, AFAICT), but still, the usual process would be to issue a PendingDeprecationWarning first.

"elem.copy() is deprecated. Use copy.copy(elem) instead.",
1) < 0) {
return NULL;
}

return _elementtree_Element___copy___impl(self);
}

/* Helper for a deep copy. */
LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);

Expand Down Expand Up @@ -3801,6 +3821,7 @@ static PyMethodDef element_methods[] = {

_ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF

_ELEMENTTREE_ELEMENT_COPY_METHODDEF
_ELEMENTTREE_ELEMENT___COPY___METHODDEF
_ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
_ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
Expand Down
19 changes: 18 additions & 1 deletion Modules/clinic/_elementtree.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.