keep legacy API in place

cldf · Oct 20, 2023 · 491d16c · 491d16c
1 parent a59b9e6
commit 491d16c
Show file tree

Hide file tree

Showing 15 changed files with 1,270 additions and 34 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,19 @@
 # Changes
 
-## [Unreleased]
+## [2.0.0]
+
+Zenodo's upgrade from Oct. 13, 2023 brought a couple of breaking changes for the Zenodo API.
+While it was possible to accomodate these changes in the implementation of `cldfzenodo`, while
+keeping the **cldfzenodo** API as is, we took the opportunity to also add a streamlined API. This
+only affects the `cldfzenodo` Python API, though - the commandline interface 
+`cldfbench zenodo.download`, as well as the `DatasetResolver` functionality has not
+changed. The old Python API is still available, though, but using it will trigger deprecation
+warnings.
+
+Deprecated functionality will be removed in v2.2.
+
+
+### Other changes
 
 - Dropped py3.7 compatibility.
 

diff --git a/README.md b/README.md
@@ -6,7 +6,9 @@
 `cldfzenodo` provides programmatic access to CLDF data deposited on [Zenodo](https://zenodo.org).
 
 **NOTE:** The Zenodo upgrade from October 13, 2023 introduced quite a few changes in various parts
-of the system. Thus, `cldfzenodo` before version 2.0 cannot be used anymore.
+of the system. Thus, `cldfzenodo` before version 2.0 cannot be used anymore. `cldfzenodo` is meant
+to be backwards compatible, i.e. provides the same Python API as `cldfzenodo` 1.x - but may issue
+deprecation warnings.
 
 
 ## Install

diff --git a/setup.cfg b/setup.cfg
@@ -35,7 +35,7 @@ package_dir =
     = src
 python_requires = >=3.8
 install_requires =
-    clldutils>=3.15.1
+    clldutils
     pycldf>=1.30.0
     nameparser
     attrs

diff --git a/src/cldfzenodo/__init__.py b/src/cldfzenodo/__init__.py
@@ -1,5 +1,37 @@
+import warnings
+
+from clldutils.misc import deprecated
+
 from cldfzenodo.record import *
 from cldfzenodo.api import *
+from cldfzenodo import oai
+from cldfzenodo import search
 
 __version__ = '1.1.1.dev0'
 # flake8: noqa
+
+# -------------------------------------------------------------------------------------------------
+# legacy API:
+# -------------------------------------------------------------------------------------------------
+def search_wordlists(q=None, **kw):
+    deprecated('Use `API.iter_records` instead.')
+    if kw:
+        warnings.warn('Zenodo search API changed, custom parameters passed as kw are ignored.')
+    return API.iter_records(keyword='cldf:Wordlist', allversions=True, _q=q)
+
+
+def search_structuredatasets(q=None, **kw):
+    deprecated('Use `API.iter_records` instead.')
+    if kw:
+        warnings.warn('Zenodo search API changed, custom parameters passed as kw are ignored.')
+    return API.iter_records(keyword='cldf:StructureDataset', allversions=True, _q=q)
+
+
+def oai_lexibank():
+    deprecated('Use `API.iter_records` instead.')
+    return API.iter_records(community='lexibank', allversions=True)
+
+
+def oai_cldf_datasets():
+    deprecated('Use `API.iter_records` instead.')
+    return API.iter_records(community='cldf-datasets', allversions=True)
diff --git a/src/cldfzenodo/api.py b/src/cldfzenodo/api.py
@@ -24,7 +24,10 @@ def q(**kw) -> str:
 
     See https://help.zenodo.org/guides/search/ for details.
     """
-    return ' '.join('{}:"{}"'.format(k, v) for k, v in kw.items())
+    res = kw.pop('_q', '') or ''
+    if kw:
+        res += '' + ' '.join('{}:"{}"'.format(k, v) for k, v in kw.items())
+    return res.strip()
 
 
 class Results:

diff --git a/src/cldfzenodo/oai.py b/src/cldfzenodo/oai.py
@@ -0,0 +1,8 @@
+from clldutils.misc import deprecated
+
+from cldfzenodo.api import API
+
+
+def iter_records(community):
+    deprecated('Use `API.iter_records` instead.')
+    return API.iter_records(community=community, allversions=True)
diff --git a/src/cldfzenodo/record.py b/src/cldfzenodo/record.py
@@ -13,12 +13,12 @@
 
 import attr
 import nameparser
-from clldutils import licenses
 from pycldf import iter_datasets, Source, Dataset
 
 __all__ = ['Record', 'GithubRepos', 'ZENODO_DOI_FORMAT', 'ZENODO_DOI_PATTERN', 'get_doi']
 
 ZENODO_DOI_PATTERN = re.compile(r"10\.5281/zenodo\.(?P<recid>[0-9]+)")
+DOI_PATTERN = re.compile(r"10\.[0-9.]+/[^/]+")
 ZENODO_DOI_FORMAT = '10.5281/zenodo.{}'
 NS = dict(
     rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#",
@@ -114,8 +114,8 @@ def get_doi(doi_or_url: str) -> str:
         doi = url.path[1:]
     else:
         raise ValueError('Unknown DOI format')
-    if not ZENODO_DOI_PATTERN.fullmatch(doi):
-        raise ValueError('Not a Zenodo DOI: "{}"'.format(doi))
+    if not (ZENODO_DOI_PATTERN.fullmatch(doi) or DOI_PATTERN.fullmatch(doi)):
+        raise ValueError('Not a DOI: "{}"'.format(doi))
     return doi
 
 
@@ -141,7 +141,7 @@ class Record:
     """
     doi = attr.ib(
         converter=get_doi,
-        validator=attr.validators.matches_re(r'10\.5281/zenodo\.[0-9]+'))
+        validator=attr.validators.matches_re(DOI_PATTERN))
     title = attr.ib()
     creators = attr.ib(converter=get_creators, default=attr.Factory(list))
     year = attr.ib(default=None)
@@ -185,7 +185,7 @@ def from_dict(cls, d):
             creators=[c['name'] for c in d['metadata']['creators']],
             year=d['metadata']['publication_date'].split('-')[0],
             version=d['metadata'].get('version'),
-            concept_doi=d['conceptdoi'],
+            concept_doi=d.get('conceptdoi'),  # There are old records with "concept_rec_id" ...
             # FIXME: Check Zenodo API periodically to see whether URLs are correct now.
             download_urls=[f['links']['self'].replace('/api/', '/') for f in d.get('files')],
         )
@@ -257,13 +257,30 @@ def bibtex(self) -> str:
             url='https://doi.org/{}'.format(self.doi),
         )
         if self.license:
-            lic = licenses.find(self.license)
-            src['copyright'] = lic.name if lic else self.license
+            src['copyright'] = self.license
         return src.bibtex()
 
-    def citation(self, api) -> str:
+    def get_citation(self, api) -> str:
         # curl -H "Accept:text/x-bibliography" "https://zenodo.org/api/records/7079637?style=apa
         return api.records(
             id_=self.id,
             params=dict(style='apa'),
             headers=dict(Accept='text/x-bibliography')).strip()
+
+    # ---------------------------------------------------------------------------------------------
+    # legacy API:
+    # ---------------------------------------------------------------------------------------------
+    @staticmethod
+    def from_doi(doi):  # pragma: no cover
+        from cldfzenodo import API
+        return API.get_record(doi=doi)
+
+    @staticmethod
+    def from_concept_doi(doi, version_tag):  # pragma: no cover
+        from cldfzenodo import API
+        return API.get_record(conceptdoi=doi, version=version_tag)
+
+    @property
+    def citation(self):  # pragma: no cover
+        from cldfzenodo import API
+        return self.get_citation(API)
diff --git a/src/cldfzenodo/search.py b/src/cldfzenodo/search.py
@@ -0,0 +1,8 @@
+from clldutils.misc import deprecated
+
+from cldfzenodo.api import API
+
+
+def iter_records(keyword, q=None, **kw):
+    deprecated('Use `API.iter_records` instead.')
+    return API.iter_records(keyword=keyword, allversions=True, _q=q)
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -6,3 +6,22 @@
 @pytest.fixture
 def fixtures_dir():
     return pathlib.Path(__file__).parent / 'fixtures'
+
+
+@pytest.fixture
+def urlopen(fixtures_dir):
+    class Response:
+        def __init__(self, fname):
+            self.path = fixtures_dir / fname
+
+        def read(self):
+            return self.path.read_bytes()
+
+    def f(req):
+        if '/communities?' in req.full_url:
+            return Response('communities.json')
+        elif 'doi%3A' in req.full_url:
+            return Response('record.json')
+        return Response('search_keyword.json')
+
+    return f