diff --git a/linkml_model/linkml_files.py b/linkml_model/linkml_files.py index 48d11cc6..bce1ca5b 100644 --- a/linkml_model/linkml_files.py +++ b/linkml_model/linkml_files.py @@ -1,6 +1,8 @@ -import os +from pathlib import Path from enum import Enum, auto -from typing import Optional, Union +from typing import Dict, Optional, Union, Tuple, NamedTuple +from urllib.parse import urljoin +from dataclasses import dataclass import requests from rdflib import Namespace @@ -9,14 +11,12 @@ LINKML_NAMESPACE = Namespace(LINKML_URL_BASE) GITHUB_IO_BASE = "https://linkml.github.io/linkml-model/" GITHUB_BASE = "https://raw.githubusercontent.com/linkml/linkml-model/" -LOCAL_BASE = os.path.abspath(os.path.dirname(__file__)) +LOCAL_BASE = Path(__file__).parent.resolve() GITHUB_API_BASE = "https://api.github.com/repos/linkml/linkml-model/" GITHUB_RELEASES = GITHUB_BASE + "releases" GITHUB_TAGS = GITHUB_BASE + "tags" - - class _AutoName(Enum): @staticmethod def _generate_next_value_(name, start, count, last_values): @@ -32,42 +32,84 @@ class Source(_AutoName): EXTENSIONS = auto() -class Format(Enum): +class Format(_AutoName): """ LinkML package formats """ - GRAPHQL = "graphql" - HTML = "" - JSON = "json" - JSONLD = "context.jsonld" - JSON_SCHEMA = "schema.json" - NATIVE_JSONLD = "model.context.jsonld" - NATIVE_RDF = "model.ttl" - NATIVE_SHEXC = "model.shex" - NATIVE_SHEXJ = "model.shexj" - OWL = "owl.ttl" - PYTHON = "py" - RDF = "ttl" - SHEXC = "shex" - SHEXJ = "shexj" - YAML = "yaml" - - -class _Path(Enum): + EXCEL = auto() + GRAPHQL = auto() + JSON = auto() + JSONLD = auto() + JSON_SCHEMA = auto() + NATIVE_JSONLD = auto() + NATIVE_RDF = auto() + NATIVE_SHEXC = auto() + NATIVE_SHEXJ = auto() + OWL = auto() + PREFIXMAP = auto() + PROTOBUF = auto() + PYTHON = auto() + RDF = auto() + SHACL = auto() + SHEXC = auto() + SHEXJ = auto() + SQLDDL = auto() + SQLSCHEMA = auto() + YAML = auto() + +@dataclass +class FormatPath: + path: str + extension: str + + def model_path(self, model:str) -> Path: + return (Path(self.path) / model).with_suffix(self.extension) + +class _Path: """ LinkML Relative paths""" - GRAPHQL = "graphql" - HTML = "docs" - JSON = "json" - JSONLD = "jsonld" - JSON_SCHEMA = "jsonschema" - NATIVE_JSONLD = "jsonld" - NATIVE_RDF = "ttl" - NATIVE_SHEXC = "shex" - NATIVE_SHEXJ = "shex" - OWL = "owl" - PYTHON = "linkml_model" - RDF = "rdf" - SHEXC = "shex" - SHEXJ = "shex" - YAML = "model/schema" + EXCEL = FormatPath("excel","xlsx" ) + GRAPHQL = FormatPath("graphql","graphql" ) + JSON = FormatPath("json","json" ) + JSONLD = FormatPath("jsonld","context.jsonld" ) + JSON_SCHEMA = FormatPath("jsonschema", "schema.json" ) + NATIVE_JSONLD = FormatPath("jsonld", "context.jsonld" ) + NATIVE_RDF = FormatPath("rdf","ttl" ) + NATIVE_SHEXC = FormatPath("shex","shex" ) + NATIVE_SHEXJ = FormatPath("shex","shexj" ) + OWL = FormatPath("owl","owl.ttl" ) + PREFIXMAP = FormatPath('prefixmap','yaml' ) + PROTOBUF = FormatPath("protobuf","proto" ) + PYTHON = FormatPath("","py" ) + RDF = FormatPath("rdf","ttl" ) + SHACL = FormatPath("shacl","shacl.ttl" ) + SHEXC = FormatPath("shex","shex" ) + SHEXJ = FormatPath("shex","shexj" ) + SQLDDL = FormatPath("sqlddl","sql" ) + SQLSCHEMA = FormatPath("sqlschema","sql" ) + YAML = FormatPath(str(Path("model") / "schema"),"yaml" ) + + @classmethod + def items(cls) -> Dict[str, FormatPath]: + return {k:v for k,v in cls.__dict__.items() if not k.startswith('_')} + + @classmethod + def get(cls, item:Union[str,Format]) -> FormatPath: + if isinstance(item, Format): + item = item.name.upper() + return getattr(cls, item) + + def __class_getitem__(cls, item:str) -> FormatPath: + return getattr(cls, item) + + +META_ONLY = ( + Format.EXCEL, + Format.GRAPHQL, + Format.OWL, + Format.PREFIXMAP, + Format.PROTOBUF, + Format.SHACL, + Format.SQLDDL, + Format.SQLSCHEMA +) class ReleaseTag(_AutoName): @@ -78,26 +120,40 @@ class ReleaseTag(_AutoName): CURRENT = auto() -def _build_path(source: Source, fmt: Format) -> str: - """ Create the relative path for source and fmt """ - return f"{_Path[fmt.name].value}/{source.value}.{fmt.value}" +class PathParts(NamedTuple): + format: str + file: str + + +def _build_path(source: Source, fmt: Format) -> PathParts: + """ + Create the parts for a relative path for source and fmt. + Combined elsewhere into a complete path, since OS paths and URLs differ. + """ + fmt_path: FormatPath = _Path.get(fmt.name) + return PathParts(fmt_path.path, f"{source.value}.{fmt_path.extension}") def _build_loc(base: str, source: Source, fmt: Format) -> str: - return f"{base}{_build_path(source, fmt)}".replace('blob/', '') + """A github location""" + # urls are always forward slash separated, so hardcoding is appropriate here + path = '/'.join(_build_path(source, fmt)) + return urljoin(base, path).replace('blob/', '') def URL_FOR(source: Source, fmt: Format) -> str: """ Return the URL to retrieve source in format """ - return f"{LINKML_URL_BASE}{source.value}.{fmt.value}" + fmt_path: FormatPath = _Path.get(fmt.name) + return f"{LINKML_URL_BASE}{source.value}.{fmt_path.extension}" def LOCAL_PATH_FOR(source: Source, fmt: Format) -> str: - return os.path.join(LOCAL_BASE, _build_path(source, fmt)) + return str(LOCAL_BASE.joinpath(*_build_path(source, fmt))) -def GITHUB_IO_PATH_FOR(source: Source, fmt: Format) -> str: - return _build_loc(GITHUB_IO_BASE, source, fmt) +def GITHUB_IO_PATH_FOR(source: Source, fmt: Format, version="latest") -> str: + path = '/'.join([version, 'linkml_model', *_build_path(source, fmt)]) + return urljoin(GITHUB_IO_BASE, path) def GITHUB_PATH_FOR(source: Source, @@ -122,7 +178,8 @@ def tag_to_commit(tag: str) -> str: # Return the absolute latest entry for branch if release is ReleaseTag.LATEST or (release is ReleaseTag.CURRENT and branch != "main"): - return f"{GITHUB_BASE}{branch}/{_build_path(source, fmt)}" + path = '/'.join([branch, 'linkml_model', *_build_path(source, fmt)]) + return urljoin(GITHUB_BASE, path) # Return the latest published version elif release is ReleaseTag.CURRENT: @@ -139,9 +196,10 @@ class ModelLoc: def __init__(self, model: Source, fmt: Format) -> str: self._model = model self._format = fmt + self._fmt_path = _Path.get(fmt.name) def __str__(self): - return f"{self._model.value}.{self._format.value}" + return f"{self._model.value}.{self._fmt_path.extension}" def __repr__(self): return str(self) @@ -171,18 +229,10 @@ def __str__(self): def __repr__(self): return str(self) - @property - def yaml(self) -> ModelLoc: - return ModelFile.ModelLoc(self._model, Format.YAML) - @property def graphql(self) -> ModelLoc: return ModelFile.ModelLoc(self._model, Format.GRAPHQL) - @property - def html(self) -> ModelLoc: - return ModelFile.ModelLoc(self._model, Format.HTML) - @property def json(self) -> ModelLoc: return ModelFile.ModelLoc(self._model, Format.JSON) diff --git a/tests/test_linkml_files.py b/tests/test_linkml_files.py index 05b6cae3..5899950d 100644 --- a/tests/test_linkml_files.py +++ b/tests/test_linkml_files.py @@ -1,64 +1,155 @@ -import os -import unittest -import re +import pytest +import requests +from pathlib import Path +from itertools import product +from urllib.parse import urlparse -import linkml_model.linkml_files as fileloc -from linkml_model.linkml_files import URL_FOR, Format, Source, LOCAL_PATH_FOR, GITHUB_IO_PATH_FOR, GITHUB_PATH_FOR, \ +try: + import requests_cache + HAVE_REQUESTS_CACHE = True +except ImportError: + HAVE_REQUESTS_CACHE = False + +from linkml_runtime.linkml_model.linkml_files import ( + Source, + Format, + _Path, + URL_FOR, + LOCAL_PATH_FOR, + LOCAL_BASE, + GITHUB_IO_PATH_FOR, + GITHUB_PATH_FOR, + META_ONLY, ReleaseTag -from tests import abspath - -root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "linkml_model")) - -SKIP_GITHUB_API = False # True means don't do the github API tests - - -class LinkMLFilesTestCase(unittest.TestCase): - """ Test that linkml_model/linkml_files.py work """ - def test_basic_rules(self): - self.assertEqual("https://w3id.org/linkml/annotations.yaml", - URL_FOR(Source.ANNOTATIONS, Format.YAML)) - self.assertEqual("https://w3id.org/linkml/meta.model.context.jsonld", - URL_FOR(Source.META, Format.NATIVE_JSONLD)) - self.assertEqual(os.path.join(root_path, "model/schema/meta.yaml"), - LOCAL_PATH_FOR(Source.META, Format.YAML)) - print(LOCAL_PATH_FOR(Source.META, Format.YAML)) - self.assertEqual(os.path.join(root_path, "jsonld/types.model.context.jsonld"), - LOCAL_PATH_FOR(Source.TYPES, Format.NATIVE_JSONLD)) - self.assertEqual("https://linkml.github.io/linkml-model/model/schema/meta.yaml", - GITHUB_IO_PATH_FOR(Source.META, Format.YAML)) - self.assertEqual("https://linkml.github.io/linkml-model/jsonld/types.model.context.jsonld", - GITHUB_IO_PATH_FOR(Source.TYPES, Format.NATIVE_JSONLD)) - self.assertEqual("https://raw.githubusercontent.com/linkml/linkml-model/main/jsonld/meta.model.context.jsonld", - GITHUB_PATH_FOR(Source.META, Format.NATIVE_JSONLD, ReleaseTag.LATEST)) - self.assertEqual("https://raw.githubusercontent.com/linkml/linkml-model/testing_branch/owl/mappings.owl.ttl", - GITHUB_PATH_FOR(Source.MAPPINGS, Format.OWL, branch="testing_branch")) - - @unittest.skipIf(SKIP_GITHUB_API, "Github API tests skipped") - def test_github_specific_rules(self): - """ - Test accesses that require github API to access - This is separate because we can only do so many tests per hour w/o getting a 403 - """ - self.assertEqual("https://raw.githubusercontent.com/linkml/linkml-model/f30637f5a585f3fc4b12fd3dbb3e7e95108d4b42/jsonld/meta.model.context.jsonld", - GITHUB_PATH_FOR(Source.META, Format.NATIVE_JSONLD, "v0.0.1")) - current_loc = re.sub(r'linkml-model/[0-9a-f]*/', 'linkml-model/SHA/', GITHUB_PATH_FOR(Source.TYPES, Format.YAML)) - self.assertEqual("https://raw.githubusercontent.com/linkml/linkml-model/SHA/model/schema/types.yaml", current_loc) - # TODO: We may want to raise an error here? - self.assertEqual('https://raw.githubusercontent.com/linkml/linkml-model/missing_branch/owl/mappings.owl.ttl', - GITHUB_PATH_FOR(Source.MAPPINGS, Format.OWL, branch="missing_branch")) - - with self.assertRaises(ValueError) as e: - GITHUB_PATH_FOR(Source.META, Format.RDF, "vv0.0.1") - self.assertEqual("Tag: vv0.0.1 not found!", str(e.exception)) - - def test_shorthand_paths(self): - self.assertEqual('meta', str(fileloc.meta)) - self.assertEqual('meta.yaml', str(fileloc.meta.yaml)) - self.assertEqual('meta.py', str(fileloc.meta.python)) - self.assertEqual(abspath('linkml_model/model/schema/meta.yaml'), str(fileloc.meta.yaml.file)) - self.assertEqual('https://linkml.github.io/linkml-model/model/schema/meta.yaml', str(fileloc.meta.yaml.github_loc())) - self.assertEqual('https://raw.githubusercontent.com/linkml/linkml-model/f30637f5a585f3fc4b12fd3dbb3e7e95108d4b42/model/schema/meta.yaml', str(fileloc.meta.yaml.github_loc('v0.0.1'))) - - -if __name__ == '__main__': - unittest.main() +) + +EXPECTED_FORMATS = [ + (source, fmt) for source, fmt in product(Source, Format) + if (fmt not in META_ONLY or source == Source.META) +] + +W3ID_EXTENSIONS = ( + 'html', + 'yaml', + 'graphql', + 'context.json', + 'context.jsonld', + 'schema.json', + 'json', + 'ttl', + 'owl', + 'shex', + 'shexc', + 'shexj' +) +W3ID_FORMATS = [ + (source, fmt) for source, fmt in EXPECTED_FORMATS + if _Path.get(fmt.name).extension in W3ID_EXTENSIONS +] +"""The formats that have rewrite rules at https://github.com/perma-id/w3id.org/blob/master/linkml/.htaccess""" + +@pytest.mark.parametrize( + 'source,fmt', + EXPECTED_FORMATS +) +def test_local_paths(source, fmt): + a_path = Path(LOCAL_PATH_FOR(source, fmt)) + assert a_path.exists() + assert a_path.is_absolute() + +@pytest.mark.parametrize( + 'fmt', + Format.__iter__() +) +def test_format_paths(fmt): + """Every format should have an entry in _Path""" + assert fmt.name in _Path.items() + +def test_no_unmapped_dirs(): + """ + There should be no additional directories that don't have a mapping for Format. + """ + EXCLUDES = ('__pycache__',) + + expected = {LOCAL_BASE / _Path.get(fmt.name).path for fmt in Format} + expected.add(LOCAL_BASE / 'model') + + actual = {a_dir for a_dir in LOCAL_BASE.iterdir() if a_dir.is_dir() and a_dir.name not in EXCLUDES} + # Special case the root directory + actual.add(LOCAL_BASE) + # Special case YAML which is in a subdirectory - we've checked for existence above + actual.add(LOCAL_BASE / _Path.get('YAML').path) + assert expected == actual + + +# -------------------------------------------------- +# URLs +# -------------------------------------------------- + +@pytest.mark.skip("github paths largely unused and expensive to test due to ratelimiting") +@pytest.mark.parametrize( + 'release_type', + ReleaseTag.__iter__() +) +@pytest.mark.parametrize( + 'source,fmt', + EXPECTED_FORMATS +) +def test_github_path_exists(source,fmt, release_type): + url = GITHUB_PATH_FOR(source, fmt, release_type) + res = requests.get(url) + assert res.status_code != 404, url + + +@pytest.mark.parametrize( + 'release_type', + ReleaseTag.__iter__() +) +@pytest.mark.parametrize( + 'source,fmt', + EXPECTED_FORMATS +) +def test_github_path_format(source,fmt, release_type): + if release_type == ReleaseTag.CURRENT: + pytest.skip("Need to cache network requests for this") + + url = GITHUB_PATH_FOR(source, fmt, release_type) + # ensure it parses + assert urlparse(url) + # for windows... + assert '\\' not in url + +@pytest.mark.skip("github paths largely unused") +@pytest.mark.parametrize( + 'source,fmt', + EXPECTED_FORMATS +) +def test_github_io_path(source,fmt): + url = GITHUB_IO_PATH_FOR(source, fmt) + res = requests.get(url) + assert res.status_code != 404, url + + +@pytest.mark.skipif(not HAVE_REQUESTS_CACHE,reason= 'Need to cache this') +@pytest.mark.parametrize( + 'source,fmt', + W3ID_FORMATS +) +def test_url_for_format(source,fmt): + url = URL_FOR(source, fmt) + res = requests.get(url) + assert res.status_code != 404, url + +def test_fixed_meta_url(): + """ + One fixed canary value - the METAMODEL_URI as used in linkml main shouldn't change + """ + assert URL_FOR(Source.META, Format.YAML) == 'https://w3id.org/linkml/meta.yaml' + assert URL_FOR(Source.META, Format.JSONLD) == 'https://w3id.org/linkml/meta.context.jsonld' + + + + + + +