Skip to content

Commit 3fcecba

Browse files
COPDS-2654: schema.org & isPartOf & distribution (#112)
* chore: schema.org * chore: distribution metadata
1 parent 17f221a commit 3fcecba

File tree

3 files changed

+40
-8
lines changed

3 files changed

+40
-8
lines changed

cads_catalogue_api_service/models/schema_org.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class Organization(pydantic.BaseModel):
3535
class DataDownload(pydantic.BaseModel):
3636
type: str = pydantic.Field("DataDownload", const=True, alias="@type")
3737
encodingFormat: str
38-
contentUrl: str
38+
url: str | None = None
3939

4040

4141
class GeoShape(pydantic.BaseModel):

cads_catalogue_api_service/schema_org.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,18 @@
3434
)
3535

3636

37+
# List subject to change in case of new portals
38+
CADS_SITE_TO_LONG_NAME = {
39+
"cds": "Climate Data Store",
40+
"ads": "Atmosphere Data Store",
41+
"cems": "CEMS Early Warning Data Store",
42+
"ecds": "ECMWF Data Store",
43+
"xds": "ECMWF Cross Data Store",
44+
"eds": "Energy Thematic Hub",
45+
"hds": "Health Thematic Hub",
46+
}
47+
48+
3749
def query_collection(
3850
session: sa.orm.Session,
3951
collection_id: str,
@@ -80,12 +92,14 @@ def schema_org_json_ld(
8092
url = get_url_link(collection, "self")
8193
license = get_url_link(collection, "license")
8294
distribution = get_url_link(collection, "layout")
95+
retrieve_url = get_url_link(collection, "retrieve")
8396
temporal_coverage = (
8497
collection.get("extent", {}).get("temporal", {}).get("interval", [])
8598
)
8699
temporal_coverage = (
87100
list(filter(None, temporal_coverage[0])) if temporal_coverage else []
88101
)
102+
download_url = f"{os.getenv(f'{site.upper()}_PROJECT_URL', None)}/datasets/{collection_id}?tab=download"
89103

90104
box = collection.get("extent", {}).get("spatial", {}).get("bbox", [])
91105

@@ -117,13 +131,21 @@ def schema_org_json_ld(
117131
(
118132
models.schema_org.DataDownload(
119133
encodingFormat=collection.get("file_format")
120-
# Sometimes the file_format is not defined on the input data
121134
or "application/octet-stream",
122-
contentUrl=f"{url}?tab=download",
135+
url=f"{retrieve_url}",
123136
)
124137
if distribution
125138
else ""
126-
)
139+
),
140+
(
141+
models.schema_org.DataDownload(
142+
encodingFormat=collection.get("file_format")
143+
or "application/octet-stream",
144+
url=download_url,
145+
)
146+
if distribution
147+
else ""
148+
),
127149
],
128150
temporalCoverage="/".join(temporal_coverage) if temporal_coverage else None,
129151
spatialCoverage=models.schema_org.Place(
@@ -141,7 +163,7 @@ def schema_org_json_ld(
141163
{
142164
"@type": "DataCatalog",
143165
"identifier": site,
144-
"name": "ECMWF Data Store",
166+
"name": CADS_SITE_TO_LONG_NAME.get(site, "ECMWF Data Store"),
145167
"url": f"{os.getenv(f'{site.upper()}_PROJECT_URL', None)}/datasets"
146168
if site
147169
else None,

tests/test_40_schema_org.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,11 @@ def static_collection_query(
8181
"href": "http://localhost:8080/api/catalogue/v1/collections/era5-something/layout",
8282
"title": "Distribution",
8383
},
84+
{
85+
"rel": "retrieve",
86+
"href": "http://localhost:8080/api/retrieve/v1/processes/era5-something",
87+
"title": "Retrieve",
88+
},
8489
],
8590
"assets": {
8691
"thumbnail": {
@@ -139,8 +144,13 @@ def test_schema_org_jsonId(monkeypatch) -> None:
139144
{
140145
"@type": "DataDownload",
141146
"encodingFormat": "application/octet-stream",
142-
"contentUrl": "http://localhost:8080/api/catalogue/v1/collections/era5-something?tab=download",
143-
}
147+
"url": "http://localhost:8080/api/retrieve/v1/processes/era5-something",
148+
},
149+
{
150+
"@type": "DataDownload",
151+
"encodingFormat": "application/octet-stream",
152+
"url": "https://cds.climate.copernicus.eu/datasets/era5-something?tab=download",
153+
},
144154
],
145155
"temporalCoverage": "2019-11-05T00:00:00Z/2023-06-22T00:00:00Z",
146156
"spatialCoverage": {
@@ -155,7 +165,7 @@ def test_schema_org_jsonId(monkeypatch) -> None:
155165
{
156166
"@type": "DataCatalog",
157167
"identifier": "cds",
158-
"name": "ECMWF Data Store",
168+
"name": "Climate Data Store",
159169
"url": "https://cds.climate.copernicus.eu/datasets",
160170
}
161171
],

0 commit comments

Comments
 (0)