Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion elasticsearch/dsl/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,11 @@ def _serialize(
if isinstance(data, collections.abc.Mapping):
return data

return data.to_dict(skip_empty=skip_empty)
try:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just isinstance(data, AttrDict)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because this would also include support for other types that are not AttrDict. And because it avoids evaluating the conditional for every field that is saved, so it should perform a bit better.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because this would also include support for other types that are not AttrDict.

Which would probably lead to more crash anyway?

And because it avoids evaluating the conditional for every field that is saved, so it should perform a bit better.

Handling an exception will cost many time what an isinstance cost. But does a few nano seconds performance of this even matter here 🤔 Just seems an unneeded "complex" structure flow to hide a very simple intent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The exception would be handled only for the fields that have a wrong type. The conditional would have to be evaluated for every single field.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And to my point, does it even matter in this case? Elasticsearch-dsl serialization/deserialization code is clearly not designed with performance in mind, so I don't really see why this one isinstance is seen as a potential issue at the cost of readability. But that's just my 2 cents. (and my last comment on this :) )

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, fwiw, I suppose your method is consistent with Field._safe_serialize

return data.to_dict(skip_empty=skip_empty)
except TypeError:
# this would only happen if an AttrDict was given instead of an InnerDoc
return data.to_dict()

def clean(self, data: Any) -> Any:
data = super().clean(data)
Expand Down
36 changes: 35 additions & 1 deletion test_elasticsearch/test_dsl/test_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,14 @@
from dateutil import tz

from elasticsearch import dsl
from elasticsearch.dsl import InnerDoc, Range, ValidationException, field
from elasticsearch.dsl import (
AttrDict,
AttrList,
InnerDoc,
Range,
ValidationException,
field,
)


def test_date_range_deserialization() -> None:
Expand Down Expand Up @@ -235,6 +242,33 @@ class Inner(InnerDoc):
field.Object(doc_class=Inner, dynamic=False)


def test_dynamic_object() -> None:
f = field.Object(dynamic=True)
assert f.deserialize({"a": "b"}).to_dict() == {"a": "b"}
assert f.deserialize(AttrDict({"a": "b"})).to_dict() == {"a": "b"}
assert f.serialize({"a": "b"}) == {"a": "b"}
assert f.serialize(AttrDict({"a": "b"})) == {"a": "b"}


def test_dynamic_nested() -> None:
f = field.Nested(dynamic=True)
assert f.deserialize([{"a": "b"}, {"c": "d"}]) == [{"a": "b"}, {"c": "d"}]
assert f.deserialize([AttrDict({"a": "b"}), {"c": "d"}]) == [
{"a": "b"},
{"c": "d"},
]
assert f.deserialize(AttrList([AttrDict({"a": "b"}), {"c": "d"}])) == [
{"a": "b"},
{"c": "d"},
]
assert f.serialize([{"a": "b"}, {"c": "d"}]) == [{"a": "b"}, {"c": "d"}]
assert f.serialize([AttrDict({"a": "b"}), {"c": "d"}]) == [{"a": "b"}, {"c": "d"}]
assert f.serialize(AttrList([AttrDict({"a": "b"}), {"c": "d"}])) == [
{"a": "b"},
{"c": "d"},
]


def test_all_fields_exported() -> None:
"""Make sure that all the generated field classes are exported at the top-level"""
fields = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from elasticsearch.dsl import (
AsyncDocument,
AsyncSearch,
AttrDict,
Binary,
Boolean,
Date,
Expand Down Expand Up @@ -627,13 +628,17 @@ async def test_can_save_to_different_index(


@pytest.mark.asyncio
@pytest.mark.parametrize("validate", (True, False))
async def test_save_without_skip_empty_will_include_empty_fields(
async_write_client: AsyncElasticsearch,
validate: bool,
) -> None:
test_repo = Repository(
field_1=[], field_2=None, field_3={}, owner={"name": None}, meta={"id": 42}
)
assert await test_repo.save(index="test-document", skip_empty=False)
assert await test_repo.save(
index="test-document", skip_empty=False, validate=validate
)

assert_doc_equals(
{
Expand All @@ -650,6 +655,23 @@ async def test_save_without_skip_empty_will_include_empty_fields(
await async_write_client.get(index="test-document", id=42),
)

test_repo = Repository(owner=AttrDict({"name": None}), meta={"id": 43})
assert await test_repo.save(
index="test-document", skip_empty=False, validate=validate
)

assert_doc_equals(
{
"found": True,
"_index": "test-document",
"_id": "43",
"_source": {
"owner": {"name": None},
},
},
await async_write_client.get(index="test-document", id=43),
)


@pytest.mark.asyncio
async def test_delete(async_write_client: AsyncElasticsearch) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

from elasticsearch import ConflictError, Elasticsearch, NotFoundError
from elasticsearch.dsl import (
AttrDict,
Binary,
Boolean,
Date,
Expand Down Expand Up @@ -621,13 +622,15 @@ def test_can_save_to_different_index(


@pytest.mark.sync
@pytest.mark.parametrize("validate", (True, False))
def test_save_without_skip_empty_will_include_empty_fields(
write_client: Elasticsearch,
validate: bool,
) -> None:
test_repo = Repository(
field_1=[], field_2=None, field_3={}, owner={"name": None}, meta={"id": 42}
)
assert test_repo.save(index="test-document", skip_empty=False)
assert test_repo.save(index="test-document", skip_empty=False, validate=validate)

assert_doc_equals(
{
Expand All @@ -644,6 +647,21 @@ def test_save_without_skip_empty_will_include_empty_fields(
write_client.get(index="test-document", id=42),
)

test_repo = Repository(owner=AttrDict({"name": None}), meta={"id": 43})
assert test_repo.save(index="test-document", skip_empty=False, validate=validate)

assert_doc_equals(
{
"found": True,
"_index": "test-document",
"_id": "43",
"_source": {
"owner": {"name": None},
},
},
write_client.get(index="test-document", id=43),
)


@pytest.mark.sync
def test_delete(write_client: Elasticsearch) -> None:
Expand Down
6 changes: 5 additions & 1 deletion utils/templates/field.py.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,11 @@ class {{ k.name }}({{ k.parent }}):
if isinstance(data, collections.abc.Mapping):
return data

return data.to_dict(skip_empty=skip_empty)
try:
return data.to_dict(skip_empty=skip_empty)
except TypeError:
# this would only happen if an AttrDict was given instead of an InnerDoc
return data.to_dict()

def clean(self, data: Any) -> Any:
data = super().clean(data)
Expand Down
Loading