Skip to content

Commit 608c333

Browse files
authored
Increase robustness requests calls and boto3 (#67)
1 parent 7ef9930 commit 608c333

File tree

10 files changed

+122
-146
lines changed

10 files changed

+122
-146
lines changed

copernicusmarine/core_functions/credentials_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
COPERNICUSMARINE_SERVICE_USERNAME,
1818
)
1919
from copernicusmarine.core_functions.sessions import (
20-
get_configured_request_session,
20+
get_configured_requests_session,
2121
)
2222
from copernicusmarine.core_functions.utils import (
2323
CACHE_BASE_DIRECTORY,
@@ -232,7 +232,7 @@ def _check_credentials_with_cas(username: str, password: str) -> bool:
232232
cmems_cas_login_url = (
233233
f"https://cmems-cas.cls.fr/cas/login?service={service}"
234234
)
235-
conn_session = get_configured_request_session()
235+
conn_session = get_configured_requests_session()
236236
login_session = conn_session.get(
237237
cmems_cas_login_url, proxies=conn_session.proxies
238238
)

copernicusmarine/core_functions/custom_zarr_store.py renamed to copernicusmarine/core_functions/custom_open_zarr.py

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,15 @@
66
import botocore.config
77
import botocore.exceptions
88
import botocore.session
9+
import xarray
910

10-
from copernicusmarine.core_functions.utils import create_custom_query_function
11+
from copernicusmarine.core_functions.sessions import (
12+
PROXIES,
13+
TRUST_ENV,
14+
_get_ssl_context,
15+
get_configured_boto3_session,
16+
)
17+
from copernicusmarine.core_functions.utils import parse_access_dataset_url
1118

1219
logger = logging.getLogger("copernicus_marine_root_logger")
1320

@@ -18,41 +25,16 @@ def __init__(
1825
endpoint: str,
1926
bucket: str,
2027
root_path: str,
21-
secret_key: Optional[str] = None,
22-
access_key: Optional[str] = None,
2328
copernicus_marine_username: Optional[str] = None,
2429
number_of_retries: int = 9,
2530
initial_retry_wait_seconds: int = 1,
2631
):
2732
self._root_path = root_path.lstrip("/")
2833
self._bucket = bucket
29-
session = botocore.session.get_session()
30-
if secret_key is None and access_key is None:
31-
self.client = session.create_client(
32-
"s3",
33-
endpoint_url=endpoint,
34-
config=botocore.config.Config(
35-
signature_version=botocore.UNSIGNED
36-
),
37-
)
38-
else:
39-
self.client = session.create_client(
40-
"s3",
41-
endpoint_url=endpoint,
42-
aws_secret_access_key=secret_key,
43-
aws_access_key_id=access_key,
44-
)
45-
self.client.meta.events.register(
46-
"before-call.s3.ListObjects",
47-
create_custom_query_function(copernicus_marine_username),
48-
)
49-
self.client.meta.events.register(
50-
"before-call.s3.HeadObject",
51-
create_custom_query_function(copernicus_marine_username),
52-
)
53-
self.client.meta.events.register(
54-
"before-call.s3.GetObject",
55-
create_custom_query_function(copernicus_marine_username),
34+
self.client, _ = get_configured_boto3_session(
35+
endpoint,
36+
["GetObject", "HeadObject", "ListObjects"],
37+
copernicus_marine_username,
5638
)
5739

5840
self.number_of_retries = number_of_retries
@@ -156,3 +138,30 @@ def with_retries(self, fn):
156138
logger.debug(f"Retrying in {retry_delay} s...")
157139
time.sleep(retry_delay)
158140
retry_delay *= 2
141+
142+
143+
def open_zarr(
144+
dataset_url: str,
145+
copernicus_marine_username: Optional[str] = None,
146+
**kwargs,
147+
) -> xarray.Dataset:
148+
(
149+
endpoint,
150+
bucket,
151+
root_path,
152+
) = parse_access_dataset_url(dataset_url)
153+
store = CustomS3Store(
154+
endpoint=endpoint,
155+
bucket=bucket,
156+
root_path=root_path,
157+
copernicus_marine_username=copernicus_marine_username,
158+
)
159+
kwargs.update(
160+
{
161+
"storage_options": {
162+
"client_kwargs": {"trust_env": TRUST_ENV, "proxies": PROXIES},
163+
"ssl": _get_ssl_context(),
164+
}
165+
}
166+
)
167+
return xarray.open_zarr(store, **kwargs)

copernicusmarine/core_functions/services_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from copernicusmarine.catalogue_parser.request_structure import (
1717
DatasetTimeAndGeographicalSubset,
1818
)
19-
from copernicusmarine.core_functions import sessions
19+
from copernicusmarine.core_functions import custom_open_zarr
2020
from copernicusmarine.core_functions.utils import (
2121
FormatNotSupported,
2222
datetime_parser,
@@ -159,7 +159,7 @@ def _get_best_arco_service_type(
159159
CopernicusMarineDatasetServiceType.TIMESERIES,
160160
CopernicusMarineDatasetServiceType.GEOSERIES,
161161
]:
162-
dataset = sessions.open_zarr(
162+
dataset = custom_open_zarr.open_zarr(
163163
dataset_url, copernicus_marine_username=username
164164
)
165165

Lines changed: 51 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,22 @@
11
import ssl
2-
from typing import Optional
2+
from typing import Any, List, Literal, Optional, Tuple
33

44
import aiohttp
5+
import boto3
6+
import botocore
7+
import botocore.config
58
import certifi
69
import nest_asyncio
710
import requests
8-
import xarray
11+
from requests.adapters import HTTPAdapter, Retry
912

10-
from copernicusmarine.core_functions.custom_zarr_store import CustomS3Store
1113
from copernicusmarine.core_functions.environment_variables import (
1214
COPERNICUSMARINE_DISABLE_SSL_CONTEXT,
1315
COPERNICUSMARINE_TRUST_ENV,
1416
PROXY_HTTP,
1517
PROXY_HTTPS,
1618
)
17-
from copernicusmarine.core_functions.utils import parse_access_dataset_url
19+
from copernicusmarine.core_functions.utils import create_custom_query_function
1820

1921
TRUST_ENV = COPERNICUSMARINE_TRUST_ENV == "True"
2022
PROXIES = {}
@@ -40,36 +42,53 @@ def get_https_proxy() -> Optional[str]:
4042
return PROXIES.get("https")
4143

4244

43-
def get_configured_request_session() -> requests.Session:
45+
def get_configured_boto3_session(
46+
endpoint_url: str,
47+
operation_type: List[Literal["ListObjects", "HeadObject", "GetObject"]],
48+
username: Optional[str] = None,
49+
return_ressources: bool = False,
50+
) -> Tuple[Any, Any]:
51+
config_boto3 = botocore.config.Config(
52+
s3={"addressing_style": "virtual"},
53+
signature_version=botocore.UNSIGNED,
54+
retries={"max_attempts": 10, "mode": "standard"},
55+
)
56+
s3_session = boto3.Session()
57+
s3_client = s3_session.client(
58+
"s3",
59+
config=config_boto3,
60+
endpoint_url=endpoint_url,
61+
)
62+
for operation in operation_type:
63+
# Register the botocore event handler for adding custom query params
64+
# to S3 HEAD and GET requests
65+
s3_client.meta.events.register(
66+
f"before-call.s3.{operation}",
67+
create_custom_query_function(username),
68+
)
69+
if not return_ressources:
70+
return s3_client, None
71+
s3_resource = boto3.resource(
72+
"s3",
73+
config=config_boto3,
74+
endpoint_url=endpoint_url,
75+
)
76+
return s3_client, s3_resource
77+
78+
79+
def get_configured_requests_session() -> requests.Session:
4480
session = requests.Session()
4581
session.trust_env = TRUST_ENV
4682
session.verify = certifi.where()
4783
session.proxies = PROXIES
48-
return session
49-
50-
51-
def open_zarr(
52-
dataset_url: str,
53-
copernicus_marine_username: Optional[str] = None,
54-
**kwargs,
55-
) -> xarray.Dataset:
56-
(
57-
endpoint,
58-
bucket,
59-
root_path,
60-
) = parse_access_dataset_url(dataset_url)
61-
store = CustomS3Store(
62-
endpoint=endpoint,
63-
bucket=bucket,
64-
root_path=root_path,
65-
copernicus_marine_username=copernicus_marine_username,
84+
session.mount(
85+
"https://",
86+
HTTPAdapter(
87+
max_retries=Retry(
88+
total=5,
89+
backoff_factor=1,
90+
status_forcelist=[500, 502, 503, 504],
91+
)
92+
),
6693
)
67-
kwargs.update(
68-
{
69-
"storage_options": {
70-
"client_kwargs": {"trust_env": TRUST_ENV, "proxies": PROXIES},
71-
"ssl": _get_ssl_context(),
72-
}
73-
}
74-
)
75-
return xarray.open_zarr(store, **kwargs)
94+
return session

copernicusmarine/core_functions/versions_verifier.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from copernicusmarine import __version__ as client_version
66
from copernicusmarine.core_functions.sessions import (
7-
get_configured_request_session,
7+
get_configured_requests_session,
88
)
99
from copernicusmarine.core_functions.utils import (
1010
construct_query_params_for_marine_data_store_monitoring,
@@ -65,7 +65,7 @@ def _get_client_required_versions(
6565
else "https://s3.waw3-1.cloudferro.com/mdl-metadata/mdsVersions.json"
6666
)
6767
logger.debug(f"Getting required versions from {url_mds_versions}")
68-
session = get_configured_request_session()
68+
session = get_configured_requests_session()
6969
mds_versions: dict[str, str] = session.get(
7070
url_mds_versions,
7171
params=construct_query_params_for_marine_data_store_monitoring(),

copernicusmarine/download_functions/download_arco_series.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import xarray
88

99
from copernicusmarine.catalogue_parser.request_structure import SubsetRequest
10-
from copernicusmarine.core_functions import sessions
10+
from copernicusmarine.core_functions import custom_open_zarr
1111
from copernicusmarine.core_functions.utils import (
1212
FORCE_DOWNLOAD_CLI_PROMPT_MESSAGE,
1313
add_copernicusmarine_version_in_dataset_attributes,
@@ -208,7 +208,7 @@ def open_dataset_from_arco_series(
208208
depth_parameters: DepthParameters,
209209
chunks=Optional[Literal["auto"]],
210210
) -> xarray.Dataset:
211-
dataset = sessions.open_zarr(
211+
dataset = custom_open_zarr.open_zarr(
212212
dataset_url,
213213
chunks=chunks,
214214
copernicus_marine_username=username,

copernicusmarine/download_functions/download_original_files.py

Lines changed: 12 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@
88
from pathlib import Path
99
from typing import List, Optional, Tuple
1010

11-
import boto3
12-
import botocore
13-
import botocore.config
1411
import click
1512
from botocore.client import ClientError
1613
from numpy import append, arange
@@ -20,9 +17,11 @@
2017
GetRequest,
2118
overload_regex_with_additionnal_filter,
2219
)
20+
from copernicusmarine.core_functions.sessions import (
21+
get_configured_boto3_session,
22+
)
2323
from copernicusmarine.core_functions.utils import (
2424
FORCE_DOWNLOAD_CLI_PROMPT_MESSAGE,
25-
create_custom_query_function,
2625
flatten,
2726
get_unique_filename,
2827
parse_access_dataset_url,
@@ -441,21 +440,8 @@ def _list_files_on_marine_data_lake_s3(
441440
recursive: bool,
442441
) -> list[tuple[str, int, datetime.datetime, str]]:
443442

444-
s3_session = boto3.Session()
445-
s3_client = s3_session.client(
446-
"s3",
447-
config=botocore.config.Config(
448-
# Configures to use subdomain/virtual calling format.
449-
s3={"addressing_style": "virtual"},
450-
signature_version=botocore.UNSIGNED,
451-
),
452-
endpoint_url=endpoint_url,
453-
)
454-
455-
# Register the botocore event handler for adding custom query params
456-
# to S3 LIST requests
457-
s3_client.meta.events.register(
458-
"before-call.s3.ListObjects", create_custom_query_function(username)
443+
s3_client, _ = get_configured_boto3_session(
444+
endpoint_url, ["ListObjects"], username
459445
)
460446

461447
paginator = s3_client.get_paginator("list_objects")
@@ -485,18 +471,8 @@ def _list_files_on_marine_data_lake_s3(
485471
def _get_file_size_and_last_modified(
486472
endpoint_url: str, bucket: str, file_in: str, username: str
487473
) -> Optional[Tuple[int, datetime.datetime]]:
488-
s3_session = boto3.Session()
489-
s3_client = s3_session.client(
490-
"s3",
491-
config=botocore.config.Config(
492-
s3={"addressing_style": "virtual"},
493-
signature_version=botocore.UNSIGNED,
494-
),
495-
endpoint_url=endpoint_url,
496-
)
497-
498-
s3_client.meta.events.register(
499-
"before-call.s3.HeadObject", create_custom_query_function(username)
474+
s3_client, _ = get_configured_boto3_session(
475+
endpoint_url, ["HeadObject"], username
500476
)
501477

502478
try:
@@ -534,36 +510,12 @@ def _original_files_file_download(
534510
"""
535511
Download ONE file and return a string of the result
536512
"""
537-
s3_session = boto3.Session()
538-
s3_client = s3_session.client(
539-
"s3",
540-
config=botocore.config.Config(
541-
# Configures to use subdomain/virtual calling format.
542-
s3={"addressing_style": "virtual"},
543-
signature_version=botocore.UNSIGNED,
544-
),
545-
endpoint_url=endpoint_url,
546-
)
547-
s3_resource = boto3.resource(
548-
"s3",
549-
config=botocore.config.Config(
550-
# Configures to use subdomain/virtual calling format.
551-
s3={"addressing_style": "virtual"},
552-
signature_version=botocore.UNSIGNED,
553-
),
554-
endpoint_url=endpoint_url,
555-
)
556-
557-
# Register the botocore event handler for adding custom query params
558-
# to S3 HEAD and GET requests
559-
s3_client.meta.events.register(
560-
"before-call.s3.HeadObject",
561-
create_custom_query_function(username),
562-
)
563-
s3_client.meta.events.register(
564-
"before-call.s3.GetObject", create_custom_query_function(username)
513+
s3_client, s3_resource = get_configured_boto3_session(
514+
endpoint_url,
515+
["GetObject", "HeadObject"],
516+
username,
517+
return_ressources=True,
565518
)
566-
567519
last_modified_date_epoch = s3_resource.Object(
568520
bucket, file_in.replace(f"s3://{bucket}/", "")
569521
).last_modified.timestamp()

0 commit comments

Comments
 (0)