Skip to content

Commit

Permalink
refactor: Use boto3 clients rather than resources TDE-1034 (#1172)
Browse files Browse the repository at this point in the history
### Motivation

The latter is deprecated

<https://boto3.amazonaws.com/v1/documentation/api/1.35.60/guide/resources.html>.

### Modifications

- Use the lower-level clients API instead of resources.
- Type annotate S3 clients to make IDE use easier.

### Verification

Standard tests.
  • Loading branch information
l0b0 authored Nov 24, 2024
1 parent bc93aaf commit ffe7f90
Show file tree
Hide file tree
Showing 10 changed files with 120 additions and 119 deletions.
13 changes: 9 additions & 4 deletions scripts/aws/aws_helper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
from os import environ
from time import sleep
from typing import Any, NamedTuple
from typing import TYPE_CHECKING, Any, NamedTuple
from urllib.parse import urlparse

from boto3 import Session
Expand All @@ -11,6 +11,11 @@

from scripts.aws.aws_credential_source import CredentialSource

if TYPE_CHECKING:
from mypy_boto3_s3 import S3Client
else:
S3Client = dict

S3Path = NamedTuple("S3Path", [("bucket", str), ("key", str)])

aws_profile = environ.get("AWS_PROFILE")
Expand All @@ -26,9 +31,9 @@

def _init_roles() -> None:
"""Load bucket to roleArn mapping for LINZ internal buckets from SSM"""
s3 = session.resource("s3")
config_path = parse_path(bucket_config_path)
content_object = s3.Object(config_path.bucket, config_path.key)
s3_client: S3Client = session.client("s3")
bucket, key = parse_path(bucket_config_path)
content_object = s3_client.Object(bucket, key)
file_content = content_object.get()["Body"].read().decode("utf-8")
json_content = json.loads(file_content)

Expand Down
6 changes: 3 additions & 3 deletions scripts/aws/tests/aws_helper_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@

def test_parse_path_s3(subtests: SubTests) -> None:
s3_path = "s3://bucket-name/path/to/the/file.test"
path = parse_path(s3_path)
bucket, key = parse_path(s3_path)

with subtests.test():
assert path.bucket == "bucket-name"
assert bucket == "bucket-name"

with subtests.test():
assert path.key == "path/to/the/file.test"
assert key == "path/to/the/file.test"


def test_parse_path_local() -> None:
Expand Down
9 changes: 7 additions & 2 deletions scripts/collection_from_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json
import os
from argparse import Namespace
from typing import List
from typing import TYPE_CHECKING, List

import shapely.geometry
import shapely.ops
Expand All @@ -17,6 +17,11 @@
from scripts.stac.imagery.create_stac import create_collection
from scripts.stac.imagery.metadata_constants import DATA_CATEGORIES, HUMAN_READABLE_REGIONS, CollectionMetadata

if TYPE_CHECKING:
from mypy_boto3_s3 import S3Client
else:
S3Client = GetObjectOutputTypeDef = dict


class NoItemsError(Exception):
pass
Expand Down Expand Up @@ -114,7 +119,7 @@ def main(args: List[str] | None = None) -> None:
msg = f"uri is not a s3 path: {uri}"
raise argparse.ArgumentTypeError(msg)

s3_client = client("s3")
s3_client: S3Client = client("s3")

files_to_read = list_files_in_uri(uri, [SUFFIX_JSON, SUFFIX_FOOTPRINT], s3_client)

Expand Down
4 changes: 2 additions & 2 deletions scripts/files/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pathlib import Path
from typing import TYPE_CHECKING

from boto3 import resource
from boto3 import client
from linz_logger import get_log

from scripts.aws.aws_helper import is_s3
Expand Down Expand Up @@ -47,7 +47,7 @@ def read(path: str) -> bytes:
try:
return fs_s3.read(path)
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html#parsing-error-responses-and-catching-exceptions-from-aws-services
except resource("s3").meta.client.exceptions.ClientError as ce:
except client("s3").exceptions.ClientError as ce:
# Error Code can be found here:
# https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html#ErrorCodeList
if ce.response["Error"]["Code"] == "NoSuchKey":
Expand Down
67 changes: 31 additions & 36 deletions scripts/files/fs_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
from datetime import datetime
from typing import TYPE_CHECKING, Any

from boto3 import client, resource
from botocore.exceptions import ClientError
from boto3 import client
from linz_logger import get_log

from scripts.aws.aws_helper import get_session, parse_path
Expand All @@ -31,19 +30,19 @@ def write(destination: str, source: bytes, content_type: str | None = None) -> N
if source is None:
get_log().error("write_s3_source_none", path=destination, error="The 'source' is None.")
raise Exception("The 'source' is None.")
s3_path = parse_path(destination)
key = s3_path.key
s3 = resource("s3")
bucket, key = parse_path(destination)
s3_client: S3Client = client("s3")
multihash = checksum.multihash_as_hex(source)

try:
s3_object = s3.Object(s3_path.bucket, key)
if content_type:
s3_object.put(Body=source, ContentType=content_type, Metadata={"multihash": multihash})
s3_client.put_object(
Bucket=bucket, Key=key, Body=source, ContentType=content_type, Metadata={"multihash": multihash}
)
else:
s3_object.put(Body=source, Metadata={"multihash": multihash})
s3_client.put_object(Bucket=bucket, Key=key, Body=source, Metadata={"multihash": multihash})
get_log().debug("write_s3_success", path=destination, duration=time_in_ms() - start_time)
except ClientError as ce:
except s3_client.exceptions.ClientError as ce:
get_log().error("write_s3_error", path=destination, error=f"Unable to write the file: {ce}")
raise ce

Expand All @@ -56,34 +55,33 @@ def read(path: str, needs_credentials: bool = False) -> bytes:
needs_credentials: Tells if credentials are needed. Defaults to False.
Raises:
ce: botocore ClientError
ClientError
Returns:
The file in bytes.
"""
start_time = time_in_ms()
s3_path = parse_path(path)
key = s3_path.key
s3 = resource("s3")
bucket, key = parse_path(path)
s3_client: S3Client = client("s3")

try:
if needs_credentials:
s3 = get_session(path).resource("s3")
s3_client = get_session(path).client("s3")

s3_object = s3.Object(s3_path.bucket, key)
file: bytes = s3_object.get()["Body"].read()
except s3.meta.client.exceptions.NoSuchBucket as nsb:
s3_object: GetObjectOutputTypeDef = s3_client.get_object(Bucket=bucket, Key=key)
file: bytes = s3_object["Body"].read()
except s3_client.exceptions.NoSuchBucket as nsb:
get_log().error("s3_bucket_not_found", path=path, error=f"The specified bucket does not seem to exist: {nsb}")
raise nsb
except s3.meta.client.exceptions.NoSuchKey as nsk:
raise
except s3_client.exceptions.NoSuchKey as nsk:
get_log().error("s3_key_not_found", path=path, error=f"The specified file does not seem to exist: {nsk}")
raise nsk
except s3.meta.client.exceptions.ClientError as ce:
raise
except s3_client.exceptions.ClientError as ce:
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html#parsing-error-responses-and-catching-exceptions-from-aws-services
if not needs_credentials and ce.response["Error"]["Code"] == "AccessDenied":
get_log().debug("read_s3_needs_credentials", path=path)
return read(path, True)
raise ce
raise

get_log().debug("read_s3_success", path=path, duration=time_in_ms() - start_time)
return file
Expand All @@ -97,36 +95,33 @@ def exists(path: str, needs_credentials: bool = False) -> bool:
needs_credentials: if acces to object needs credentials. Defaults to False.
Raises:
ce: ClientError
nsb: NoSuchBucket
s3_client.exceptions.ClientError
NoSuchBucket
Returns:
True if the S3 Object exists
"""
s3_path, key = parse_path(path)
s3 = resource("s3")
bucket, key = parse_path(path)
s3_client: S3Client = client("s3")

try:
if needs_credentials:
s3 = get_session(path).resource("s3")
s3_client = get_session(path).client("s3")

if path.endswith("/"):
bucket_name = bucket_name_from_path(s3_path)
bucket = s3.Bucket(bucket_name)
# MaxKeys limits to 1 object in the response
objects = bucket.objects.filter(Prefix=key, MaxKeys=1)

objects = s3_client.list_objects_v2(Bucket=bucket, Prefix=key, MaxKeys=1)
if len(list(objects)) > 0:
return True
return False

# load() fetch the metadata, not the data. Calls a `head` behind the scene.
s3.Object(s3_path, key).load()
s3_client.head_object(Bucket=bucket, Key=key)

return True
except s3.meta.client.exceptions.NoSuchBucket as nsb:
except s3_client.exceptions.NoSuchBucket as nsb:
get_log().debug("s3_bucket_not_found", path=path, info=f"The specified bucket does not seem to exist: {nsb}")
return False
except s3.meta.client.exceptions.ClientError as ce:
except s3_client.exceptions.ClientError as ce:
if not needs_credentials and ce.response["Error"]["Code"] == "AccessDenied":
get_log().debug("read_s3_needs_credentials", path=path)
return exists(path, True)
Expand All @@ -136,7 +131,7 @@ def exists(path: str, needs_credentials: bool = False) -> bool:
get_log().debug("s3_key_not_found", path=path, info=f"The specified key does not seem to exist: {ce}")
return False
get_log().error("s3_client_error", path=path, error=f"ClientError raised: {ce}")
raise ce
raise


def bucket_name_from_path(path: str) -> str:
Expand Down
77 changes: 34 additions & 43 deletions scripts/files/tests/fs_s3_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json

from boto3 import client, resource
from boto3 import client
from botocore.exceptions import ClientError
from moto import mock_aws
from moto.core.models import DEFAULT_ACCOUNT_ID
Expand All @@ -18,13 +18,12 @@

@mock_aws
def test_write(subtests: SubTests) -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")

write("s3://testbucket/test.file", b"test content")

resp = boto3_client.get_object(Bucket="testbucket", Key="test.file")
resp = s3_client.get_object(Bucket="testbucket", Key="test.file")
with subtests.test():
assert resp["Body"].read() == b"test content"

Expand All @@ -34,12 +33,11 @@ def test_write(subtests: SubTests) -> None:

@mock_aws
def test_write_content_type(subtests: SubTests) -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")

write("s3://testbucket/test.tiff", b"test content", ContentType.GEOTIFF.value)
resp = boto3_client.get_object(Bucket="testbucket", Key="test.tiff")
resp = s3_client.get_object(Bucket="testbucket", Key="test.tiff")
with subtests.test():
assert resp["Body"].read() == b"test content"

Expand All @@ -49,23 +47,21 @@ def test_write_content_type(subtests: SubTests) -> None:

@mock_aws
def test_write_multihash_as_metadata(subtests: SubTests) -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")

write("s3://testbucket/test.tiff", b"test content", ContentType.GEOTIFF.value)
resp = boto3_client.get_object(Bucket="testbucket", Key="test.tiff")
resp = s3_client.get_object(Bucket="testbucket", Key="test.tiff")

with subtests.test():
assert resp["Metadata"]["multihash"] == "12206ae8a75555209fd6c44157c0aed8016e763ff435a19cf186f76863140143ff72"


@mock_aws
def test_read() -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
boto3_client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")
s3_client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content")

content = read("s3://testbucket/test.file")

Expand All @@ -84,8 +80,8 @@ def test_read_bucket_not_found(capsys: CaptureFixture[str]) -> None:

@mock_aws
def test_read_key_not_found(capsys: CaptureFixture[str]) -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")

with raises(ClientError):
read("s3://testbucket/test.file")
Expand All @@ -96,10 +92,9 @@ def test_read_key_not_found(capsys: CaptureFixture[str]) -> None:

@mock_aws
def test_exists() -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
boto3_client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")
s3_client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content")

file_exists = exists("s3://testbucket/test.file")

Expand All @@ -108,10 +103,9 @@ def test_exists() -> None:

@mock_aws
def test_directory_exists() -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
boto3_client.put_object(Bucket="testbucket", Key="hello/test.file", Body=b"test content")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")
s3_client.put_object(Bucket="testbucket", Key="hello/test.file", Body=b"test content")

directory_exists = exists("s3://testbucket/hello/")

Expand All @@ -132,10 +126,9 @@ def test_exists_bucket_not_exists(capsys: CaptureFixture[str], subtests: SubTest

@mock_aws
def test_exists_object_not_exists() -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
boto3_client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")
s3_client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content")

file_exists = exists("s3://testbucket/test.file")

Expand All @@ -144,10 +137,9 @@ def test_exists_object_not_exists() -> None:

@mock_aws
def test_exists_object_starting_with_not_exists() -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
boto3_client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")
s3_client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content")

file_exists = exists("s3://testbucket/hello/another.fi")

Expand All @@ -157,14 +149,13 @@ def test_exists_object_starting_with_not_exists() -> None:
@mock_aws
def test_list_files_in_uri(subtests: SubTests) -> None:
bucket_name = "testbucket"
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket=bucket_name)
boto3_client.put_object(Bucket=bucket_name, Key="data/collection.json", Body=b"")
boto3_client.put_object(Bucket=bucket_name, Key="data/image.tiff", Body=b"")
boto3_client.put_object(Bucket=bucket_name, Key="data/image_meta.xml", Body=b"")

files = list_files_in_uri(f"s3://{bucket_name}/data/", [".json", "_meta.xml"], boto3_client)
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket=bucket_name)
s3_client.put_object(Bucket=bucket_name, Key="data/collection.json", Body=b"")
s3_client.put_object(Bucket=bucket_name, Key="data/image.tiff", Body=b"")
s3_client.put_object(Bucket=bucket_name, Key="data/image_meta.xml", Body=b"")

files = list_files_in_uri(f"s3://{bucket_name}/data/", [".json", "_meta.xml"], s3_client)

with subtests.test():
assert len(files) == 2
Expand Down
Loading

0 comments on commit ffe7f90

Please sign in to comment.