Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion betty/tests/wikipedia/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,10 @@ async def test_get_place_coordinates__should_return(
Path(__file__),
SVG,
"An Example Image",
{
"en": "An Example Image",
"nl": "Een voorbeeldafbeelding",
},
"https://example.com/description",
"example.svg",
),
Expand All @@ -502,6 +506,14 @@ async def test_get_place_coordinates__should_return(
"mime": "image/svg+xml",
"canonicaltitle": "File:An Example Image",
"descriptionurl": "https://example.com/description",
"extmetadata": {
"ImageDescription": {
"value": {
"en": "An Example Image",
"nl": "Een voorbeeldafbeelding",
},
},
},
},
],
}
Expand All @@ -526,7 +538,7 @@ async def test_get_image__should_return(
page_language = "en"
page_name = "Amsterdam & Omstreken"
page_fetch_url = "https://en.wikipedia.org/w/api.php?action=query&titles=Amsterdam%20%26%20Omstreken&prop=langlinks|pageimages|coordinates&lllimit=500&piprop=name&pilicense=free&pilimit=1&coprimary=primary&format=json&formatversion=2"
file_fetch_url = "https://en.wikipedia.org/w/api.php?action=query&prop=imageinfo&titles=File:Amsterdam%20%26%20Omstreken&iiprop=url|mime|canonicaltitle&format=json&formatversion=2"
file_fetch_url = "https://en.wikipedia.org/w/api.php?action=query&prop=imageinfo&titles=File:Amsterdam%20%26%20Omstreken&iiprop=url|mime|canonicaltitle|extmetadata&iiextmetadatamultilang=1&format=json&formatversion=2"

fetch_map = {page_fetch_url: _new_json_fetch_response(page_fetch_json)}
fetch_file_map = {}
Expand All @@ -548,3 +560,7 @@ async def test_get_image__should_return(
assert actual.title == expected.title
assert actual.wikimedia_commons_url == expected.wikimedia_commons_url
assert actual.path is image_file_path
assert actual.description == {
"en": "An Example Image",
"nl": "Een voorbeeldafbeelding",
}
1 change: 1 addition & 0 deletions betty/tests/wikipedia/test_populator.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,7 @@ async def test_populate_has_links(
Path(__file__),
MediaType("application/octet-stream"),
"",
{},
"https://example.com",
"example",
)
Expand Down
17 changes: 15 additions & 2 deletions betty/wikipedia/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
from geopy import Point

from betty.fetch import Fetcher, FetchError
from betty.locale.localizable import plain
from betty.locale.localizable import (
StaticTranslations,
plain,
)
from betty.media_type import MediaType
from betty.typing import internal

Expand All @@ -25,6 +28,7 @@
from betty.concurrent import RateLimiter


@internal
@final
@dataclass(frozen=True)
class Summary:
Expand All @@ -45,6 +49,7 @@ def url(https://github.com/self) -> str:
return f"https://{self.locale}.wikipedia.org/wiki/{self.name}"


@internal
@final
@dataclass(frozen=True)
class Image:
Expand All @@ -55,6 +60,7 @@ class Image:
path: Path
media_type: MediaType
title: str
description: StaticTranslations
wikimedia_commons_url: str
name: str

Expand Down Expand Up @@ -175,7 +181,7 @@ async def get_image(self, page_language: str, page_name: str) -> Image | None:
if page_image_name in self._images:
return self._images[page_image_name]

url = f"https://en.wikipedia.org/w/api.php?action=query&prop=imageinfo&titles=File:{quote(page_image_name)}&iiprop=url|mime|canonicaltitle&format=json&formatversion=2"
url = f"https://en.wikipedia.org/w/api.php?action=query&prop=imageinfo&titles=File:{quote(page_image_name)}&iiprop=url|mime|canonicaltitle|extmetadata&iiextmetadatamultilang=1&format=json&formatversion=2"
image_info_api_data = await self._get_query_api_data(url)

try:
Expand All @@ -195,6 +201,13 @@ async def get_image(self, page_language: str, page_name: str) -> Image | None:
image_info["canonicaltitle"][
image_info["canonicaltitle"].index(":") + 1 :
],
{
language: description
for language, description in image_info["extmetadata"][
"ImageDescription"
]["value"].items()
if not language.startswith("_")
},
image_info["descriptionurl"],
Path(urlparse(image_info["url"]).path).name,
)
Expand Down
12 changes: 7 additions & 5 deletions betty/wikipedia/populator.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

if TYPE_CHECKING:
from collections.abc import Mapping, MutableMapping, MutableSequence, Sequence
from pathlib import Path

from betty.ancestry import Ancestry
from betty.locale.localizer import LocalizerRepository
Expand All @@ -57,8 +58,8 @@ def __init__(
self._locales = locales
self._localizers = localizers
self._client = client
self._image_files: MutableMapping[Image, File] = {}
self._image_files_locks: Mapping[Image, Lock] = defaultdict(
self._image_files: MutableMapping[Path, File] = {}
self._image_files_locks: Mapping[Path, Lock] = defaultdict(
AsynchronizedLock.threading
)
self._copyright_notice = copyright_notice
Expand Down Expand Up @@ -218,9 +219,9 @@ async def _populate_has_file_references_link(
async def _image_file_reference(
self, has_file_references: HasFileReferences, image: Image
) -> FileReference:
async with self._image_files_locks[image]:
async with self._image_files_locks[image.path]:
try:
file = self._image_files[image]
file = self._image_files[image.path]
except KeyError:
links = []
for locale in self._locales:
Expand All @@ -245,8 +246,9 @@ async def _image_file_reference(
media_type=image.media_type,
links=links,
copyright_notice=self._copyright_notice,
description=image.description,
)
self._image_files[image] = file
self._image_files[image.path] = file
self._ancestry.add(file)
file_reference = FileReference(has_file_references, file)
self._ancestry.add(file_reference)
Expand Down