Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Use boto3 clients rather than resources TDE-1034 #1172

Merged
merged 5 commits into from
Nov 24, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions scripts/aws/aws_helper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
from os import environ
from time import sleep
from typing import Any, NamedTuple
from typing import TYPE_CHECKING, Any, NamedTuple
from urllib.parse import urlparse

from boto3 import Session
Expand All @@ -11,6 +11,11 @@

from scripts.aws.aws_credential_source import CredentialSource

if TYPE_CHECKING:
from mypy_boto3_s3 import S3Client
else:
S3Client = dict

S3Path = NamedTuple("S3Path", [("bucket", str), ("key", str)])

aws_profile = environ.get("AWS_PROFILE")
Expand All @@ -26,9 +31,9 @@

def _init_roles() -> None:
"""Load bucket to roleArn mapping for LINZ internal buckets from SSM"""
s3 = session.resource("s3")
config_path = parse_path(bucket_config_path)
content_object = s3.Object(config_path.bucket, config_path.key)
s3_client: S3Client = session.client("s3")
bucket, key = parse_path(bucket_config_path)
content_object = s3_client.Object(bucket, key)
file_content = content_object.get()["Body"].read().decode("utf-8")
json_content = json.loads(file_content)

Expand Down
6 changes: 3 additions & 3 deletions scripts/aws/tests/aws_helper_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@

def test_parse_path_s3(subtests: SubTests) -> None:
s3_path = "s3://bucket-name/path/to/the/file.test"
path = parse_path(s3_path)
bucket, key = parse_path(s3_path)

with subtests.test():
assert path.bucket == "bucket-name"
assert bucket == "bucket-name"

with subtests.test():
assert path.key == "path/to/the/file.test"
assert key == "path/to/the/file.test"


def test_parse_path_local() -> None:
Expand Down
9 changes: 7 additions & 2 deletions scripts/collection_from_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json
import os
from argparse import Namespace
from typing import List
from typing import TYPE_CHECKING, List

import shapely.geometry
import shapely.ops
Expand All @@ -17,6 +17,11 @@
from scripts.stac.imagery.create_stac import create_collection
from scripts.stac.imagery.metadata_constants import DATA_CATEGORIES, HUMAN_READABLE_REGIONS, CollectionMetadata

if TYPE_CHECKING:
from mypy_boto3_s3 import S3Client
else:
S3Client = GetObjectOutputTypeDef = dict


class NoItemsError(Exception):
pass
Expand Down Expand Up @@ -114,7 +119,7 @@ def main(args: List[str] | None = None) -> None:
msg = f"uri is not a s3 path: {uri}"
raise argparse.ArgumentTypeError(msg)

s3_client = client("s3")
s3_client: S3Client = client("s3")

files_to_read = list_files_in_uri(uri, [SUFFIX_JSON, SUFFIX_FOOTPRINT], s3_client)

Expand Down
4 changes: 2 additions & 2 deletions scripts/files/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pathlib import Path
from typing import TYPE_CHECKING

from boto3 import resource
from boto3 import client
from linz_logger import get_log

from scripts.aws.aws_helper import is_s3
Expand Down Expand Up @@ -47,7 +47,7 @@ def read(path: str) -> bytes:
try:
return fs_s3.read(path)
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html#parsing-error-responses-and-catching-exceptions-from-aws-services
except resource("s3").meta.client.exceptions.ClientError as ce:
except client("s3").exceptions.ClientError as ce:
# Error Code can be found here:
# https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html#ErrorCodeList
if ce.response["Error"]["Code"] == "NoSuchKey":
Expand Down
65 changes: 30 additions & 35 deletions scripts/files/fs_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
from datetime import datetime
from typing import TYPE_CHECKING, Any

from boto3 import client, resource
from botocore.exceptions import ClientError
from boto3 import client
from linz_logger import get_log

from scripts.aws.aws_helper import get_session, parse_path
Expand All @@ -31,19 +30,19 @@ def write(destination: str, source: bytes, content_type: str | None = None) -> N
if source is None:
get_log().error("write_s3_source_none", path=destination, error="The 'source' is None.")
raise Exception("The 'source' is None.")
s3_path = parse_path(destination)
key = s3_path.key
s3 = resource("s3")
bucket, key = parse_path(destination)
s3_client: S3Client = client("s3")
multihash = checksum.multihash_as_hex(source)

try:
s3_object = s3.Object(s3_path.bucket, key)
if content_type:
s3_object.put(Body=source, ContentType=content_type, Metadata={"multihash": multihash})
s3_client.put_object(
Bucket=bucket, Key=key, Body=source, ContentType=content_type, Metadata={"multihash": multihash}
)
else:
s3_object.put(Body=source, Metadata={"multihash": multihash})
s3_client.put_object(Bucket=bucket, Key=key, Body=source, Metadata={"multihash": multihash})
get_log().debug("write_s3_success", path=destination, duration=time_in_ms() - start_time)
except ClientError as ce:
except s3_client.exceptions.ClientError as ce:
get_log().error("write_s3_error", path=destination, error=f"Unable to write the file: {ce}")
raise ce

Expand All @@ -56,34 +55,33 @@ def read(path: str, needs_credentials: bool = False) -> bytes:
needs_credentials: Tells if credentials are needed. Defaults to False.

Raises:
ce: botocore ClientError
ClientError

Returns:
The file in bytes.
"""
start_time = time_in_ms()
s3_path = parse_path(path)
key = s3_path.key
s3 = resource("s3")
bucket, key = parse_path(path)
s3_client: S3Client = client("s3")

try:
if needs_credentials:
s3 = get_session(path).resource("s3")
s3_client = get_session(path).client("s3")

s3_object = s3.Object(s3_path.bucket, key)
file: bytes = s3_object.get()["Body"].read()
except s3.meta.client.exceptions.NoSuchBucket as nsb:
s3_object: GetObjectOutputTypeDef = s3_client.get_object(Bucket=bucket, Key=key)
file: bytes = s3_object["Body"].read()
except s3_client.exceptions.NoSuchBucket as nsb:
get_log().error("s3_bucket_not_found", path=path, error=f"The specified bucket does not seem to exist: {nsb}")
raise nsb
except s3.meta.client.exceptions.NoSuchKey as nsk:
raise
except s3_client.exceptions.NoSuchKey as nsk:
get_log().error("s3_key_not_found", path=path, error=f"The specified file does not seem to exist: {nsk}")
raise nsk
except s3.meta.client.exceptions.ClientError as ce:
raise
except s3_client.exceptions.ClientError as ce:
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html#parsing-error-responses-and-catching-exceptions-from-aws-services
if not needs_credentials and ce.response["Error"]["Code"] == "AccessDenied":
get_log().debug("read_s3_needs_credentials", path=path)
return read(path, True)
raise ce
raise

get_log().debug("read_s3_success", path=path, duration=time_in_ms() - start_time)
return file
Expand All @@ -97,36 +95,33 @@ def exists(path: str, needs_credentials: bool = False) -> bool:
needs_credentials: if acces to object needs credentials. Defaults to False.

Raises:
ce: ClientError
ce: s3_client.exceptions.ClientError
l0b0 marked this conversation as resolved.
Show resolved Hide resolved
nsb: NoSuchBucket

Returns:
True if the S3 Object exists
"""
s3_path, key = parse_path(path)
s3 = resource("s3")
bucket, key = parse_path(path)
s3_client: S3Client = client("s3")

try:
if needs_credentials:
s3 = get_session(path).resource("s3")
s3_client = get_session(path).client("s3")

if path.endswith("/"):
bucket_name = bucket_name_from_path(s3_path)
bucket = s3.Bucket(bucket_name)
# MaxKeys limits to 1 object in the response
objects = bucket.objects.filter(Prefix=key, MaxKeys=1)

objects = s3_client.list_objects_v2(Bucket=bucket, Prefix=key, MaxKeys=1)
if len(list(objects)) > 0:
return True
return False

# load() fetch the metadata, not the data. Calls a `head` behind the scene.
s3.Object(s3_path, key).load()
s3_client.head_object(Bucket=bucket, Key=key)

return True
except s3.meta.client.exceptions.NoSuchBucket as nsb:
except s3_client.exceptions.NoSuchBucket as nsb:
get_log().debug("s3_bucket_not_found", path=path, info=f"The specified bucket does not seem to exist: {nsb}")
return False
except s3.meta.client.exceptions.ClientError as ce:
except s3_client.exceptions.ClientError as ce:
if not needs_credentials and ce.response["Error"]["Code"] == "AccessDenied":
get_log().debug("read_s3_needs_credentials", path=path)
return exists(path, True)
Expand All @@ -136,7 +131,7 @@ def exists(path: str, needs_credentials: bool = False) -> bool:
get_log().debug("s3_key_not_found", path=path, info=f"The specified key does not seem to exist: {ce}")
return False
get_log().error("s3_client_error", path=path, error=f"ClientError raised: {ce}")
raise ce
raise


def bucket_name_from_path(path: str) -> str:
Expand Down
77 changes: 34 additions & 43 deletions scripts/files/tests/fs_s3_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json

from boto3 import client, resource
from boto3 import client
from botocore.exceptions import ClientError
from moto import mock_aws
from moto.core.models import DEFAULT_ACCOUNT_ID
Expand All @@ -18,13 +18,12 @@

@mock_aws
def test_write(subtests: SubTests) -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")

write("s3://testbucket/test.file", b"test content")

resp = boto3_client.get_object(Bucket="testbucket", Key="test.file")
resp = s3_client.get_object(Bucket="testbucket", Key="test.file")
with subtests.test():
assert resp["Body"].read() == b"test content"

Expand All @@ -34,12 +33,11 @@ def test_write(subtests: SubTests) -> None:

@mock_aws
def test_write_content_type(subtests: SubTests) -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")

write("s3://testbucket/test.tiff", b"test content", ContentType.GEOTIFF.value)
resp = boto3_client.get_object(Bucket="testbucket", Key="test.tiff")
resp = s3_client.get_object(Bucket="testbucket", Key="test.tiff")
with subtests.test():
assert resp["Body"].read() == b"test content"

Expand All @@ -49,23 +47,21 @@ def test_write_content_type(subtests: SubTests) -> None:

@mock_aws
def test_write_multihash_as_metadata(subtests: SubTests) -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")

write("s3://testbucket/test.tiff", b"test content", ContentType.GEOTIFF.value)
resp = boto3_client.get_object(Bucket="testbucket", Key="test.tiff")
resp = s3_client.get_object(Bucket="testbucket", Key="test.tiff")

with subtests.test():
assert resp["Metadata"]["multihash"] == "12206ae8a75555209fd6c44157c0aed8016e763ff435a19cf186f76863140143ff72"


@mock_aws
def test_read() -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
boto3_client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")
s3_client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content")

content = read("s3://testbucket/test.file")

Expand All @@ -84,8 +80,8 @@ def test_read_bucket_not_found(capsys: CaptureFixture[str]) -> None:

@mock_aws
def test_read_key_not_found(capsys: CaptureFixture[str]) -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")

with raises(ClientError):
read("s3://testbucket/test.file")
Expand All @@ -96,10 +92,9 @@ def test_read_key_not_found(capsys: CaptureFixture[str]) -> None:

@mock_aws
def test_exists() -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
boto3_client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")
s3_client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content")

file_exists = exists("s3://testbucket/test.file")

Expand All @@ -108,10 +103,9 @@ def test_exists() -> None:

@mock_aws
def test_directory_exists() -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
boto3_client.put_object(Bucket="testbucket", Key="hello/test.file", Body=b"test content")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")
s3_client.put_object(Bucket="testbucket", Key="hello/test.file", Body=b"test content")

directory_exists = exists("s3://testbucket/hello/")

Expand All @@ -132,10 +126,9 @@ def test_exists_bucket_not_exists(capsys: CaptureFixture[str], subtests: SubTest

@mock_aws
def test_exists_object_not_exists() -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
boto3_client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")
s3_client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content")

file_exists = exists("s3://testbucket/test.file")

Expand All @@ -144,10 +137,9 @@ def test_exists_object_not_exists() -> None:

@mock_aws
def test_exists_object_starting_with_not_exists() -> None:
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket="testbucket")
boto3_client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content")
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket="testbucket")
s3_client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content")

file_exists = exists("s3://testbucket/hello/another.fi")

Expand All @@ -157,14 +149,13 @@ def test_exists_object_starting_with_not_exists() -> None:
@mock_aws
def test_list_files_in_uri(subtests: SubTests) -> None:
bucket_name = "testbucket"
s3 = resource("s3", region_name=DEFAULT_REGION_NAME)
boto3_client = client("s3", region_name=DEFAULT_REGION_NAME)
s3.create_bucket(Bucket=bucket_name)
boto3_client.put_object(Bucket=bucket_name, Key="data/collection.json", Body=b"")
boto3_client.put_object(Bucket=bucket_name, Key="data/image.tiff", Body=b"")
boto3_client.put_object(Bucket=bucket_name, Key="data/image_meta.xml", Body=b"")

files = list_files_in_uri(f"s3://{bucket_name}/data/", [".json", "_meta.xml"], boto3_client)
s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket=bucket_name)
s3_client.put_object(Bucket=bucket_name, Key="data/collection.json", Body=b"")
s3_client.put_object(Bucket=bucket_name, Key="data/image.tiff", Body=b"")
s3_client.put_object(Bucket=bucket_name, Key="data/image_meta.xml", Body=b"")

files = list_files_in_uri(f"s3://{bucket_name}/data/", [".json", "_meta.xml"], s3_client)

with subtests.test():
assert len(files) == 2
Expand Down
Loading
Loading