Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added docstring and type annotations to family(), removed parameter endpoint #87

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
193 changes: 182 additions & 11 deletions epo_ops/api.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
# -*- coding: utf-8 -*-
import logging
import warnings
from base64 import b64encode
from typing import List, Optional, Union
from xml.etree import ElementTree as ET

import requests
from requests.exceptions import HTTPError

from . import exceptions
from .middlewares import Throttler
from .models import NETWORK_TIMEOUT, AccessToken, Request
from .models import (
NETWORK_TIMEOUT,
AccessToken,
Docdb,
Epodoc,
Original,
Request,
)

log = logging.getLogger(__name__)

Expand All @@ -35,23 +44,125 @@
self.secret = secret
self._access_token = None

def family(self, reference_type, input, endpoint=None, constituents=None):
def family(
self,
reference_type: str,
input: Union[Docdb, Epodoc],
endpoint=None,
constituents: Optional[List[str]] = None,
) -> requests.Response:
"""
Retrieves the patent numbers of the extended patent family related to the input (INPADOC family).

Args:
reference_type (str): Any of "publication", "application", or "priority".
input (Epodoc or Docdb): The document number. Cannot be Original.
endpoint (optional): None. Not applicable for family service.
constituents (List[str], optional): List of "biblio", "legal" or both.
Defaults to None.

Returns:
requests.Response: a requests.Response object.

Examples:
>>> response = client.family("publication", epo_ops.models.Epodoc("EP1000000"))
>>> response
<Response [200]>
>>> len(response.text)
8790

>>> response_with_constituents = client.family("publication", epo_ops.models.Epodoc("EP1000000"), None, ["biblio", "legal"])
>>> response_with_constituents
<Response [200]>
>>> len(response_with_constituents.text)
160206
"""
if endpoint is not None:
warnings.warn(

Check warning on line 81 in epo_ops/api.py

View check run for this annotation

Codecov / codecov/patch

epo_ops/api.py#L80-L81

Added lines #L80 - L81 were not covered by tests
"The `endpoint` argument is not used in this context and will be removed.",
DeprecationWarning,
stacklevel=2,
)

url = self._make_request_url(
dict(
service=self.__family_path__,
reference_type=reference_type,
input=input,
endpoint=endpoint,
endpoint=None,
Comment on lines -44 to +92
Copy link
Member

@amotl amotl Jan 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we are 100% sure on this, let's do it. Otherwise, it would also be a breaking change, if, by chance, the parameter would still be used/needed in some cases.

Copy link
Contributor Author

@mattkeanny mattkeanny Jan 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tested the family() method, and the implementation allows endpoint to be assigned one of the allowed values for the parameter constituents and still work as if constituents was assigned the value. Although not according to ops specs, the resulting url when using e.g. endpoint="biblio" is the same as if constituents=["biblio"] was used.
So removing it may indeed break code out there. I'd leave that for a major version upgrade. Fine with the deprecation warning.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Matt. Apologies for the delay.

So removing it may indeed break code out there. I'd leave that for a major version upgrade. Fine with the deprecation warning.

I am currently not totally sure, it has been a while... Do you agree that we want to keep endpoint=endpoint? It looks like the patch hasn't changed yet.

On the other hand, I don't want to be too pedantic about it if you think it will be good to go.

constituents=constituents,
use_get=True,
)
)
return self._make_request(url, None, params=input.as_api_input(), use_get=True)

def image(self, path, range=1, document_format="application/tiff"):
def image(
self, path: str, range: int = 1, document_format: str = "application/tiff"
) -> requests.Response:
"""
Retrieve the image page for a given path, one page at a time.
The path needs to be retrieved from the xml resulting from a prior inquiry using
the published_data() service with the 'endpoint="images"' argument.

Args:
path (str): contained in the 'link' attribute of the document instance element (inquiry xml).
range (int, optional): the number of the image page to be fetched. Defaults to 1.
document_format (str, optional): depends on the inquiry response. Defaults to "application/tiff".

Returns:
requests.Response: a requests.Response object.
"""
return self._image_request(path, range, document_format)

def number(self, reference_type, input, output_format):
def number(
self,
reference_type: str,
input: Union[Original, Docdb, Epodoc],
output_format: str,
) -> requests.Response:
"""
This service converts a patent number from one input format into another format.

Args:
reference_type (str): Any of "publication", "application", or "priority".
input (Original, Epodoc or Docdb): The document number as a data object.
output_format (str): Any of "original", "epodoc" or "docdb".

Returns:
requests.Response: a requests.Response object.


Examples:
# from JP original to docdb
>>> response = client.number(
"application",
Original(number="2006-147056", country_code="JP", kind_code="A", date="20060526"),
"docdb,
)

# from US original to epodoc
>>> response = client.number(
"application",
Original("08/921,321", "US", "A", "19970829"),
"epodoc",
)

# from PCT original to docdb
>>> response = client.number(
"application",
Original("PCT/GB02/04635", date="19970829"),
"docdb",
)

Use-cases:
Given that other OPS services use only the Epodoc or Docdb format,
the general use-case of this method is to convert the Original format
into either the Docdb or the Epodoc format.

Note:
It is especially important to include the date of publication in the input
whenever possible because number formatting may vary depending on the date.
"""
possible_conversions = {
"docdb": ["original", "epodoc"],
"epodoc": ["original"],
Expand All @@ -73,8 +184,32 @@
)

def published_data(
self, reference_type, input, endpoint="biblio", constituents=None
):
self,
reference_type: str,
input: Union[Docdb, Epodoc],
endpoint="biblio",
constituents: Optional[List[str]] = None,
) -> requests.Response:
"""
Retrieval service for published data.

Args:
reference_type (str): Any of "publication", "application", or "priority".
input (Epodoc or Docdb): The document number as a Epodoc or Docdb data object.
endpoint (str, optional): "biblio", "equivalents", "abstract", "claims", "description",
"fulltext", "images". Defaults to "biblio".
constituents (list[str], optional): List of "biblio", "abstract", "images", "full cycle".

Returns:
requests.Response: a requests.Response object.

Note:
1) input cannot be a models.Original
2) only the endpoint "biblio" or "equivalents" use the constituents parameter.
3) the images and fulltext retrieval require a two-step process: inquiry, then retrieval, e.g.
- client.published_data(..., endpoint='images',...) to retrieve the image path, then
- client.image(path=...)
"""
return self._service_request(
dict(
service=self.__published_data_path__,
Expand All @@ -86,8 +221,16 @@
)

def published_data_search(
self, cql, range_begin=1, range_end=25, constituents=None
):
self,
cql: str,
range_begin: int = 1,
range_end: int = 25,
constituents: Optional[List[str]] = None,
) -> requests.Response:
"""
Performs a bibliographic search ussing common query language (CQL) to retrieve the data.
Possible constituents: "abstract", "biblio" and/or "full-cycle".
"""
range = dict(key="X-OPS-Range", begin=range_begin, end=range_end)
return self._search_request(
dict(
Expand All @@ -97,7 +240,24 @@
range,
)

def register(self, reference_type, input, constituents=None):
def register(
self,
reference_type: str,
input: Epodoc,
constituents: Optional[List[str]] = None,
) -> requests.Response:
"""
Provides the interface for the European Patent Register online service for retrieving all
the publicly available information on published European patent applications and
international PCT applications designating the EPO as they pass through the grant procedure.

Possible constituents: "biblio", "events", "procedural-steps" or "upp".

Notes:
1) Only the Epodoc input format is supported
2) the default behaviour of the register retrieval is biblio, so you don't have to add the
biblio constituent if you want to retrieve only bibliographic data.
"""
# TODO: input can only be Epodoc, not Docdb
constituents = constituents or ["biblio"]
return self._service_request(
Expand All @@ -109,7 +269,18 @@
)
)

def register_search(self, cql, range_begin=1, range_end=25):
def register_search(
self, cql: str, range_begin: int = 1, range_end: int = 25
) -> requests.Response:
"""
Use this service to find specific register data
that is part of the public aspect of the patent lifecycle.

Example:
>>> response = client.register_search(cql="pa=IBM", range_begin=1, range_end=25)
>>> print(response.text)

"""
range = dict(key="Range", begin=range_begin, end=range_end)
return self._search_request(
{"service": self.__register_search_path__}, cql, range
Expand Down
Loading