diff --git a/contrib/admin/cleanup_aws_resources.py b/contrib/admin/cleanup_aws_resources.py index e626d0e4f4..00c9af6dc9 100755 --- a/contrib/admin/cleanup_aws_resources.py +++ b/contrib/admin/cleanup_aws_resources.py @@ -22,7 +22,8 @@ from src.toil.lib import aws from src.toil.lib.aws import session -from src.toil.lib.aws.utils import delete_iam_role, delete_iam_instance_profile, delete_s3_bucket, delete_sdb_domain +from src.toil.lib.aws.utils import delete_iam_role, delete_iam_instance_profile, delete_sdb_domain +from src.toil.lib.aws.s3 import delete_s3_bucket from src.toil.lib.generatedEC2Lists import regionDict # put us-west-2 first as our default test region; that way anything with a universal region shows there diff --git a/src/toil/jobStores/aws/jobStore.py b/src/toil/jobStores/aws/jobStore.py index 74958a2d77..27514cd062 100644 --- a/src/toil/jobStores/aws/jobStore.py +++ b/src/toil/jobStores/aws/jobStore.py @@ -37,10 +37,17 @@ from urllib.parse import ParseResult, parse_qs, urlencode, urlsplit, urlunsplit from botocore.exceptions import ClientError +from mypy_boto3_s3.service_resource import Bucket +from mypy_boto3_sdb import SimpleDBClient +from mypy_boto3_sdb.type_defs import ReplaceableItemTypeDef, ReplaceableAttributeTypeDef, SelectResultTypeDef, ItemTypeDef, AttributeTypeDef, DeletableItemTypeDef, UpdateConditionTypeDef +from toil.lib.aws.utils import flatten_tags, enable_public_objects import toil.lib.encryption as encryption from toil.fileStores import FileID from toil.job import Job, JobDescription +from toil.lib.aws import tags_from_env +from toil.lib.aws.s3 import create_s3_bucket, delete_s3_bucket + from toil.jobStores.abstractJobStore import ( AbstractJobStore, ConcurrentFileModificationException, @@ -62,12 +69,10 @@ uploadFromPath, ) from toil.jobStores.utils import ReadablePipe, ReadableTransformingPipe, WritablePipe -from toil.lib.aws import build_tag_dict_from_env from toil.lib.aws.session import establish_boto3_session from toil.lib.aws.utils import ( NoBucketLocationError, boto3_pager, - create_s3_bucket, enable_public_objects, flatten_tags, get_bucket_region, @@ -77,6 +82,7 @@ retry_s3, retryable_s3_errors, ) + from toil.lib.compatibility import compat_bytes from toil.lib.ec2nodes import EC2Regions from toil.lib.exceptions import panic @@ -834,9 +840,7 @@ def bucket_retry_predicate(error): bucketExisted = False logger.debug("Bucket '%s' does not exist.", bucket_name) if create: - bucket = create_s3_bucket( - self.s3_resource, bucket_name, self.region - ) + bucket = create_s3_bucket(self.s3_resource, bucket_name, self.region) # Wait until the bucket exists before checking the region and adding tags bucket.wait_until_exists() @@ -845,11 +849,10 @@ def bucket_retry_predicate(error): # produce an S3ResponseError with code # NoSuchBucket. We let that kick us back up to the # main retry loop. - assert ( - get_bucket_region(bucket_name) == self.region + assert (get_bucket_region(bucket_name) == self.region ), f"bucket_name: {bucket_name}, {get_bucket_region(bucket_name)} != {self.region}" - tags = build_tag_dict_from_env() + tags = tags_from_env() if tags: flat_tags = flatten_tags(tags) @@ -1742,7 +1745,7 @@ def destroy(self): # TODO: Add other failure cases to be ignored here. self._registered = None if self.files_bucket is not None: - self._delete_bucket(self.files_bucket) + delete_s3_bucket(s3_resource=s3_boto3_resource, bucket_name=self.files_bucket.name) self.files_bucket = None for name in 'files_domain_name', 'jobs_domain_name': domainName = getattr(self, name) @@ -1760,30 +1763,6 @@ def _delete_domain(self, domainName): if not no_such_sdb_domain(e): raise - @staticmethod - def _delete_bucket(bucket): - """ - :param bucket: S3.Bucket - """ - for attempt in retry_s3(): - with attempt: - try: - uploads = s3_boto3_client.list_multipart_uploads(Bucket=bucket.name).get('Uploads') - if uploads: - for u in uploads: - s3_boto3_client.abort_multipart_upload(Bucket=bucket.name, - Key=u["Key"], - UploadId=u["UploadId"]) - - bucket.objects.all().delete() - bucket.object_versions.delete() - bucket.delete() - except s3_boto3_client.exceptions.NoSuchBucket: - pass - except ClientError as e: - if get_error_status(e) != 404: - raise - aRepr = reprlib.Repr() aRepr.maxstring = 38 # so UUIDs don't get truncated (36 for UUID plus 2 for quotes) diff --git a/src/toil/lib/aws/__init__.py b/src/toil/lib/aws/__init__.py index 4f757e0235..474688ccf4 100644 --- a/src/toil/lib/aws/__init__.py +++ b/src/toil/lib/aws/__init__.py @@ -176,6 +176,7 @@ def file_begins_with(path, prefix): except (URLError, socket.timeout, HTTPException): return False + def running_on_ecs() -> bool: """ Return True if we are currently running on Amazon ECS, and false otherwise. @@ -183,22 +184,17 @@ def running_on_ecs() -> bool: # We only care about relatively current ECS return 'ECS_CONTAINER_METADATA_URI_V4' in os.environ -def build_tag_dict_from_env(environment: MutableMapping[str, str] = os.environ) -> Dict[str, str]: - tags = dict() - owner_tag = environment.get('TOIL_OWNER_TAG') + +def tags_from_env() -> Dict[str, str]: + try: + tags = json.loads(os.environ.get('TOIL_AWS_TAGS', '{}')) + except json.decoder.JSONDecodeError: + logger.error('TOIL_AWS_TAGS must be in JSON format: {"key" : "value", ...}') + exit(1) + + # TODO: Remove TOIL_OWNER_TAG and only use TOIL_AWS_TAGS . + owner_tag = os.environ.get('TOIL_OWNER_TAG') if owner_tag: tags.update({'Owner': owner_tag}) - user_tags = environment.get('TOIL_AWS_TAGS') - if user_tags: - try: - json_user_tags = json.loads(user_tags) - if isinstance(json_user_tags, dict): - tags.update(json.loads(user_tags)) - else: - logger.error('TOIL_AWS_TAGS must be in JSON format: {"key" : "value", ...}') - exit(1) - except json.decoder.JSONDecodeError: - logger.error('TOIL_AWS_TAGS must be in JSON format: {"key" : "value", ...}') - exit(1) return tags diff --git a/src/toil/lib/aws/s3.py b/src/toil/lib/aws/s3.py new file mode 100644 index 0000000000..10889c0139 --- /dev/null +++ b/src/toil/lib/aws/s3.py @@ -0,0 +1,145 @@ +# Copyright (C) 2015-2023 Regents of the University of California +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import sys + +from typing import (Any, + Dict, + List, + Optional, + Union, + cast) + +from toil.lib.retry import retry, get_error_status +from toil.lib.misc import printq +from . import tags_from_env +from toil.lib.aws.utils import enable_public_objects, flatten_tags + +if sys.version_info >= (3, 8): + from typing import Literal +else: + from typing_extensions import Literal + +try: + from boto.exception import BotoServerError, S3ResponseError + from botocore.exceptions import ClientError + from mypy_boto3_iam import IAMClient, IAMServiceResource + from mypy_boto3_s3 import S3Client, S3ServiceResource + from mypy_boto3_s3.literals import BucketLocationConstraintType + from mypy_boto3_s3.service_resource import Bucket, Object + from mypy_boto3_sdb import SimpleDBClient +except ImportError: + BotoServerError = Exception # type: ignore + S3ResponseError = Exception # type: ignore + ClientError = Exception # type: ignore + # AWS/boto extra is not installed + + +logger = logging.getLogger(__name__) + + +@retry(errors=[BotoServerError, S3ResponseError, ClientError]) +def create_s3_bucket( + s3_resource: "S3ServiceResource", + bucket_name: str, + region: Union["BucketLocationConstraintType", Literal["us-east-1"]], + tags: Optional[Dict[str, str]] = None, + public: bool = True +) -> "Bucket": + """ + Create an AWS S3 bucket, using the given Boto3 S3 session, with the + given name, in the given region. + + Supports the us-east-1 region, where bucket creation is special. + + *ALL* S3 bucket creation should use this function. + """ + logger.debug("Creating bucket '%s' in region %s.", bucket_name, region) + if region == "us-east-1": # see https://github.com/boto/boto3/issues/125 + bucket = s3_resource.create_bucket(Bucket=bucket_name) + else: + bucket = s3_resource.create_bucket( + Bucket=bucket_name, + CreateBucketConfiguration={"LocationConstraint": region}, + ) + # wait until the bucket exists before adding tags + bucket.wait_until_exists() + + tags = tags_from_env() if tags is None else tags + bucket_tagging = s3_resource.BucketTagging(bucket_name) + bucket_tagging.put(Tagging={'TagSet': flatten_tags(tags)}) # type: ignore + + # enabling public objects is the historical default + if public: + enable_public_objects(bucket_name) + + return bucket + + +@retry(errors=[BotoServerError, S3ResponseError, ClientError]) +def delete_s3_bucket( + s3_resource: "S3ServiceResource", + bucket_name: str, + quiet: bool = True +) -> None: + """ + Delete the bucket with 'bucket_name'. + + Note: 'quiet' is False when used for a clean up utility script (contrib/admin/cleanup_aws_resources.py) + that prints progress rather than logging. Logging should be used for all other internal Toil usage. + """ + assert isinstance(bucket_name, str), f'{bucket_name} is not a string ({type(bucket_name)}).' + logger.debug("Deleting bucket '%s'.", bucket_name) + printq(f'\n * Deleting s3 bucket: {bucket_name}\n\n', quiet) + + s3_client = s3_resource.meta.client + + try: + for u in s3_client.list_multipart_uploads(Bucket=bucket_name).get('Uploads', []): + s3_client.abort_multipart_upload( + Bucket=bucket_name, + Key=u["Key"], + UploadId=u["UploadId"] + ) + + paginator = s3_client.get_paginator('list_object_versions') + for response in paginator.paginate(Bucket=bucket_name): + # Versions and delete markers can both go in here to be deleted. + # They both have Key and VersionId, but there's no shared base type + # defined for them in the stubs to express that. See + # . So we + # have to do gymnastics to get them into the same list. + to_delete: List[Dict[str, Any]] = cast(List[Dict[str, Any]], response.get('Versions', [])) + \ + cast(List[Dict[str, Any]], response.get('DeleteMarkers', [])) + for entry in to_delete: + printq(f" Deleting {entry['Key']} version {entry['VersionId']}", quiet) + s3_client.delete_object( + Bucket=bucket_name, + Key=entry['Key'], + VersionId=entry['VersionId'] + ) + bucket = s3_resource.Bucket(bucket_name) + bucket.objects.all().delete() + bucket.object_versions.delete() + bucket.delete() + printq(f'\n * Deleted s3 bucket successfully: {bucket_name}\n\n', quiet) + logger.debug("Deleted s3 bucket successfully '%s'.", bucket_name) + except s3_client.exceptions.NoSuchBucket: + printq(f'\n * S3 bucket no longer exists: {bucket_name}\n\n', quiet) + logger.debug("S3 bucket no longer exists '%s'.", bucket_name) + except ClientError as e: + if get_error_status(e) != 404: + raise + printq(f'\n * S3 bucket no longer exists: {bucket_name}\n\n', quiet) + logger.debug("S3 bucket no longer exists '%s'.", bucket_name) diff --git a/src/toil/lib/aws/session.py b/src/toil/lib/aws/session.py index dc8b837d4d..bd01a23803 100644 --- a/src/toil/lib/aws/session.py +++ b/src/toil/lib/aws/session.py @@ -15,6 +15,7 @@ import logging import os import threading + from typing import ( TYPE_CHECKING, Dict, @@ -30,6 +31,8 @@ import boto3 import boto3.resources.base import botocore + +from typing import Dict, Optional, Tuple, cast, Union, Literal, overload, TypeVar from boto3 import Session from botocore.client import Config from botocore.session import get_session @@ -61,6 +64,7 @@ # initializing Boto3 (or Boto2) things at a time. _init_lock = threading.RLock() + def _new_boto3_session(region_name: Optional[str] = None) -> Session: """ This is the One True Place where new Boto3 sessions should be made, and @@ -81,6 +85,7 @@ def _new_boto3_session(region_name: Optional[str] = None) -> Session: return Session(botocore_session=botocore_session, region_name=region_name, profile_name=os.environ.get("TOIL_AWS_PROFILE", None)) + class AWSConnectionManager: """ Class that represents a connection to AWS. Caches Boto 3 and Boto 2 objects @@ -234,7 +239,6 @@ def client( config: Optional[Config] = None, ) -> "AutoScalingClient": ... - def client(self, region: Optional[str], service_name: Literal["ec2", "iam", "s3", "sts", "sdb", "autoscaling"], endpoint_url: Optional[str] = None, config: Optional[Config] = None) -> botocore.client.BaseClient: """ diff --git a/src/toil/lib/aws/utils.py b/src/toil/lib/aws/utils.py index 53440bfd4e..9e0e2d1ca0 100644 --- a/src/toil/lib/aws/utils.py +++ b/src/toil/lib/aws/utils.py @@ -31,6 +31,7 @@ ) from urllib.parse import ParseResult +from mypy_boto3_sdb.type_defs import AttributeTypeDef from toil.lib.aws import AWSRegionName, AWSServerErrors, session from toil.lib.misc import printq from toil.lib.retry import ( @@ -77,6 +78,7 @@ 'EC2ThrottledException', ] + @retry(errors=[AWSServerErrors]) def delete_iam_role( role_name: str, region: Optional[str] = None, quiet: bool = True @@ -137,12 +139,12 @@ def connection_reset(e: Exception) -> bool: # errno is listed as 104. To be safe, we check for both: return isinstance(e, socket.error) and e.errno in (errno.ECONNRESET, 104) + def connection_error(e: Exception) -> bool: """ Return True if an error represents a failure to make a network connection. """ - return (connection_reset(e) - or isinstance(e, EndpointConnectionError)) + return (connection_reset(e) or isinstance(e, EndpointConnectionError)) # TODO: Replace with: @retry and ErrorCondition @@ -166,58 +168,6 @@ def retry_s3(delays: Iterable[float] = DEFAULT_DELAYS, timeout: float = DEFAULT_ """ return old_retry(delays=delays, timeout=timeout, predicate=predicate) -@retry(errors=[AWSServerErrors]) -def delete_s3_bucket( - s3_resource: "S3ServiceResource", - bucket: str, - quiet: bool = True -) -> None: - """ - Delete the given S3 bucket. - """ - printq(f'Deleting s3 bucket: {bucket}', quiet) - - paginator = s3_resource.meta.client.get_paginator('list_object_versions') - try: - for response in paginator.paginate(Bucket=bucket): - # Versions and delete markers can both go in here to be deleted. - # They both have Key and VersionId, but there's no shared base type - # defined for them in the stubs to express that. See - # . So we - # have to do gymnastics to get them into the same list. - to_delete: List[Dict[str, Any]] = cast(List[Dict[str, Any]], response.get('Versions', [])) + \ - cast(List[Dict[str, Any]], response.get('DeleteMarkers', [])) - for entry in to_delete: - printq(f" Deleting {entry['Key']} version {entry['VersionId']}", quiet) - s3_resource.meta.client.delete_object(Bucket=bucket, Key=entry['Key'], VersionId=entry['VersionId']) - s3_resource.Bucket(bucket).delete() - printq(f'\n * Deleted s3 bucket successfully: {bucket}\n\n', quiet) - except s3_resource.meta.client.exceptions.NoSuchBucket: - printq(f'\n * S3 bucket no longer exists: {bucket}\n\n', quiet) - - -def create_s3_bucket( - s3_resource: "S3ServiceResource", - bucket_name: str, - region: AWSRegionName, -) -> "Bucket": - """ - Create an AWS S3 bucket, using the given Boto3 S3 session, with the - given name, in the given region. - - Supports the us-east-1 region, where bucket creation is special. - - *ALL* S3 bucket creation should use this function. - """ - logger.info("Creating bucket '%s' in region %s.", bucket_name, region) - if region == "us-east-1": # see https://github.com/boto/boto3/issues/125 - bucket = s3_resource.create_bucket(Bucket=bucket_name) - else: - bucket = s3_resource.create_bucket( - Bucket=bucket_name, - CreateBucketConfiguration={"LocationConstraint": region}, - ) - return bucket @retry(errors=[ClientError]) def enable_public_objects(bucket_name: str) -> None: @@ -352,9 +302,11 @@ def attempt_head_bucket() -> Optional[str]: # If we get here we ran out of attempts. raise NoBucketLocationError("Could not get bucket location: " + "\n".join(error_messages)) from last_error + def region_to_bucket_location(region: str) -> str: return '' if region == 'us-east-1' else region + def bucket_location_to_region(location: Optional[str]) -> str: return "us-east-1" if location == "" or location is None else location @@ -462,10 +414,9 @@ def list_objects_for_url(url: ParseResult) -> List[str]: logger.debug('Found in %s items: %s', url, listing) return listing + def flatten_tags(tags: Dict[str, str]) -> List[Dict[str, str]]: - """ - Convert tags from a key to value dict into a list of 'Key': xxx, 'Value': xxx dicts. - """ + """Convert tags from a key to value dict into a list of 'Key': xxx, 'Value': xxx dicts.""" return [{'Key': k, 'Value': v} for k, v in tags.items()] diff --git a/src/toil/provisioners/aws/awsProvisioner.py b/src/toil/provisioners/aws/awsProvisioner.py index 33621c52e7..8afaa42b71 100644 --- a/src/toil/provisioners/aws/awsProvisioner.py +++ b/src/toil/provisioners/aws/awsProvisioner.py @@ -40,7 +40,6 @@ cast, ) from urllib.parse import unquote - # We need these to exist as attributes we can get off of the boto object from botocore.exceptions import ClientError from mypy_extensions import KwArg, VarArg @@ -53,8 +52,9 @@ policy_permissions_allow, ) from toil.lib.aws.session import AWSConnectionManager +from toil.lib.aws.s3 import create_s3_bucket +from toil.lib.aws.utils import flatten_tags, boto3_pager from toil.lib.aws.session import client as get_client -from toil.lib.aws.utils import boto3_pager, create_s3_bucket, flatten_tags from toil.lib.conversions import human2bytes from toil.lib.ec2 import ( a_short_time, @@ -336,14 +336,8 @@ def _write_file_to_cloud(self, key: str, contents: bytes) -> str: bucket = s3.Bucket(bucket_name) except ClientError as err: if get_error_status(err) == 404: - bucket = create_s3_bucket(s3, bucket_name=bucket_name, region=self._region) - bucket.wait_until_exists() + bucket = create_s3_bucket(s3, bucket_name, self._region) bucket.Versioning().enable() - - owner_tag = os.environ.get('TOIL_OWNER_TAG') - if owner_tag: - bucket_tagging = s3.BucketTagging(bucket_name) - bucket_tagging.put(Tagging={'TagSet': [{'Key': 'Owner', 'Value': owner_tag}]}) else: raise diff --git a/src/toil/test/jobStores/jobStoreTest.py b/src/toil/test/jobStores/jobStoreTest.py index 33453f92d7..7ea0520fa4 100644 --- a/src/toil/test/jobStores/jobStoreTest.py +++ b/src/toil/test/jobStores/jobStoreTest.py @@ -22,6 +22,8 @@ import time import urllib.parse as urlparse import uuid +import pytest + from abc import ABCMeta, abstractmethod from io import BytesIO from itertools import chain, islice @@ -30,8 +32,6 @@ from threading import Thread from typing import Any, Tuple from urllib.request import Request, urlopen - -import pytest from stubserver import FTPStubServer from toil.common import Config, Toil @@ -40,6 +40,8 @@ from toil.jobStores.abstractJobStore import (NoSuchFileException, NoSuchJobException) from toil.jobStores.fileJobStore import FileJobStore +from toil.lib.aws.s3 import create_s3_bucket, delete_s3_bucket +from toil.lib.aws.utils import get_object_for_url from toil.lib.io import mkdtemp from toil.lib.memoize import memoize from toil.lib.retry import retry @@ -1328,31 +1330,20 @@ def testSDBDomainsDeletedOnFailedJobstoreBucketCreation(self): from toil.lib.aws.utils import retry_s3 externalAWSLocation = 'us-west-1' - for testRegion in 'us-east-1', 'us-west-2': + for region in 'us-east-1', 'us-west-2': # We run this test twice, once with the default s3 server us-east-1 as the test region # and once with another server (us-west-2). The external server is always us-west-1. # This incidentally tests that the BucketLocationConflictException is thrown when using # both the default, and a non-default server. - testJobStoreUUID = str(uuid.uuid4()) + test_uuid = str(uuid.uuid4()) # Create the bucket at the external region - bucketName = 'domain-test-' + testJobStoreUUID + '--files' + bucket_name = f'domain-test-{test_uuid}--files' client = establish_boto3_session().client('s3', region_name=externalAWSLocation) resource = establish_boto3_session().resource('s3', region_name=externalAWSLocation) - for attempt in retry_s3(delays=(2, 5, 10, 30, 60), timeout=600): - with attempt: - # Create the bucket at the home region - client.create_bucket(Bucket=bucketName, - CreateBucketConfiguration={'LocationConstraint': externalAWSLocation}) - - owner_tag = os.environ.get('TOIL_OWNER_TAG') - if owner_tag: - for attempt in retry_s3(delays=(1, 1, 2, 4, 8, 16), timeout=33): - with attempt: - bucket_tagging = resource.BucketTagging(bucketName) - bucket_tagging.put(Tagging={'TagSet': [{'Key': 'Owner', 'Value': owner_tag}]}) - - options = Job.Runner.getDefaultOptions('aws:' + testRegion + ':domain-test-' + testJobStoreUUID) + create_s3_bucket(resource, bucket_name, externalAWSLocation) + + options = Job.Runner.getDefaultOptions(f'aws:{region}:domain-test-{test_uuid}') options.logLevel = 'DEBUG' try: with Toil(options) as toil: @@ -1372,35 +1363,12 @@ def testSDBDomainsDeletedOnFailedJobstoreBucketCreation(self): next_token = domains.get("NextToken") if next_token is None: break - self.assertFalse([d for d in allDomainNames if testJobStoreUUID in d]) + self.assertFalse([d for d in allDomainNames if test_uuid in d]) else: self.fail() finally: - try: - for attempt in retry_s3(): - with attempt: - client.delete_bucket(Bucket=bucketName) - except ClientError as e: - # The actual HTTP code of the error is in status. - if e.response.get('ResponseMetadata', {}).get('HTTPStatusCode') == 404: - # The bucket doesn't exist; maybe a failed delete actually succeeded. - pass - else: - raise + delete_s3_bucket(s3_resource=resource, bucket_name=bucket_name) - @slow - def testInlinedFiles(self): - from toil.jobStores.aws.jobStore import AWSJobStore - jobstore = self.jobstore_initialized - for encrypted in (True, False): - n = AWSJobStore.FileInfo.maxInlinedSize() - sizes = (1, n // 2, n - 1, n, n + 1, 2 * n) - for size in chain(sizes, islice(reversed(sizes), 1)): - s = os.urandom(size) - with jobstore.write_shared_file_stream('foo') as f: - f.write(s) - with jobstore.read_shared_file_stream('foo') as f: - self.assertEqual(s, f.read()) def testOverlargeJob(self): jobstore = self.jobstore_initialized @@ -1483,23 +1451,17 @@ def _hashTestFile(self, url: str) -> str: def _createExternalStore(self): """A S3.Bucket instance is returned""" from toil.jobStores.aws.jobStore import establish_boto3_session - from toil.lib.aws.utils import retry_s3, create_s3_bucket - resource = establish_boto3_session().resource( - "s3", region_name=self.awsRegion() - ) - bucket_name = f"import-export-test-{uuid.uuid4()}" - location = self.awsRegion() + resource = establish_boto3_session().resource("s3", region_name=self.awsRegion()) - for attempt in retry_s3(): - with attempt: - bucket = create_s3_bucket(resource, bucket_name, location) - bucket.wait_until_exists() - return bucket + return create_s3_bucket( + s3_resource=resource, + bucket_name=f"import-export-test-{uuid.uuid4()}", + region=self.awsRegion() + ) def _cleanUpExternalStore(self, bucket): from toil.jobStores.aws.jobStore import establish_boto3_session - from toil.lib.aws.utils import delete_s3_bucket resource = establish_boto3_session().resource( "s3", region_name=self.awsRegion() diff --git a/src/toil/test/lib/aws/test_iam.py b/src/toil/test/lib/aws/test_iam.py index c1b5310468..d10ca6e410 100644 --- a/src/toil/test/lib/aws/test_iam.py +++ b/src/toil/test/lib/aws/test_iam.py @@ -13,8 +13,8 @@ # limitations under the License. import json import logging - import boto3 + from moto import mock_aws from toil.lib.aws import iam @@ -26,7 +26,6 @@ class IAMTest(ToilTest): """Check that given permissions and associated functions perform correctly""" - def test_permissions_iam(self): granted_perms = {'*': {'Action': ['ec2:*', 'iam:*', 's3:*', 'sdb:*'], 'NotAction': []}} assert iam.policy_permissions_allow(granted_perms, iam.CLUSTER_LAUNCHING_PERMISSIONS) is True diff --git a/src/toil/test/lib/aws/test_s3.py b/src/toil/test/lib/aws/test_s3.py index 7ba5c9e77b..3ab3d1b170 100644 --- a/src/toil/test/lib/aws/test_s3.py +++ b/src/toil/test/lib/aws/test_s3.py @@ -18,7 +18,8 @@ from toil.jobStores.aws.jobStore import AWSJobStore from toil.lib.aws.session import establish_boto3_session -from toil.lib.aws.utils import create_s3_bucket, get_bucket_region +from toil.lib.aws.s3 import create_s3_bucket, delete_s3_bucket +from toil.lib.aws.utils import get_bucket_region from toil.test import ToilTest, needs_aws_s3 logger = logging.getLogger(__name__) @@ -46,15 +47,7 @@ def setUpClass(cls) -> None: def test_create_bucket(self) -> None: """Test bucket creation for us-east-1.""" bucket_name = f"toil-s3test-{uuid.uuid4()}" - assert self.s3_resource S3Test.bucket = create_s3_bucket(self.s3_resource, bucket_name, "us-east-1") - S3Test.bucket.wait_until_exists() - owner_tag = os.environ.get("TOIL_OWNER_TAG") - if owner_tag: - bucket_tagging = self.s3_resource.BucketTagging(bucket_name) - bucket_tagging.put( - Tagging={"TagSet": [{"Key": "Owner", "Value": owner_tag}]} - ) self.assertEqual(get_bucket_region(bucket_name), "us-east-1") # Make sure all the bucket location getting strategies work on a bucket we created @@ -74,5 +67,5 @@ def test_get_bucket_location_public_bucket(self) -> None: @classmethod def tearDownClass(cls) -> None: if cls.bucket: - AWSJobStore._delete_bucket(cls.bucket) + delete_s3_bucket(cls.bucket) super().tearDownClass() diff --git a/src/toil/test/lib/aws/test_utils.py b/src/toil/test/lib/aws/test_utils.py index d70dce17e0..31ecaba54c 100644 --- a/src/toil/test/lib/aws/test_utils.py +++ b/src/toil/test/lib/aws/test_utils.py @@ -1,64 +1,50 @@ # Copyright (C) 2015-2021 Regents of the University of California # -# Licensed under the Apache License, Version 2.0 (the "License"); +# Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, +# distributed under the License is distributed on an 'AS IS' BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import logging +import logging +import os import pytest -from toil.lib.aws import build_tag_dict_from_env +from unittest import mock + +from toil.lib.aws import tags_from_env from toil.test import ToilTest logger = logging.getLogger(__name__) logging.basicConfig(level=logging.DEBUG) class TagGenerationTest(ToilTest): - """ - Test for tag generation from environment variables - """ + """Test for tag generation from environment variables.""" + + @mock.patch.dict(os.environ, {'TOIL_OWNER_TAG': '😀', + 'TOIL_AWS_TAGS': '{}'}) def test_build_tag(self): - environment = dict() - environment["TOIL_OWNER_TAG"] = "😀" - environment["TOIL_AWS_TAGS"] = None - tag_dict = build_tag_dict_from_env(environment) - assert(tag_dict == {'Owner': '😀'}) + tag_dict = tags_from_env() + assert tag_dict == {'Owner': '😀'} + @mock.patch.dict(os.environ, {'TOIL_AWS_TAGS': '{}'}) def test_empty_aws_tags(self): - environment = dict() - environment["TOIL_OWNER_TAG"] = None - environment["TOIL_AWS_TAGS"] = "{}" - tag_dict = build_tag_dict_from_env(environment) - assert (tag_dict == dict()) - - def test_incorrect_json_object(self): - with pytest.raises(SystemExit): - environment = dict() - environment["TOIL_OWNER_TAG"] = None - environment["TOIL_AWS_TAGS"] = "231" - tag_dict = build_tag_dict_from_env(environment) + tag_dict = tags_from_env() + assert tag_dict == dict() + @mock.patch.dict(os.environ, {'TOIL_AWS_TAGS': '😀'}) def test_incorrect_json_emoji(self): with pytest.raises(SystemExit): - environment = dict() - environment["TOIL_OWNER_TAG"] = None - environment["TOIL_AWS_TAGS"] = "😀" - tag_dict = build_tag_dict_from_env(environment) + tags_from_env() + @mock.patch.dict(os.environ, {'TOIL_OWNER_TAG': '😀', + 'TOIL_AWS_TAGS': '{"1": "2", " ":")"}'}) def test_build_tag_with_tags(self): - environment = dict() - environment["TOIL_OWNER_TAG"] = "😀" - environment["TOIL_AWS_TAGS"] = '{"1": "2", " ":")"}' - tag_dict = build_tag_dict_from_env(environment) - assert(tag_dict == {'Owner': '😀', '1': '2', ' ': ')'}) - - - + tag_dict = tags_from_env() + assert tag_dict == {'Owner': '😀', '1': '2', ' ': ')'} diff --git a/src/toil/test/server/serverTest.py b/src/toil/test/server/serverTest.py index b3688552d4..22796f812c 100644 --- a/src/toil/test/server/serverTest.py +++ b/src/toil/test/server/serverTest.py @@ -203,22 +203,22 @@ def setUpClass(cls) -> None: super().setUpClass() from toil.lib.aws import get_current_aws_region, session - from toil.lib.aws.utils import create_s3_bucket + from toil.lib.aws.s3 import create_s3_bucket cls.region = get_current_aws_region() cls.s3_resource = session.resource("s3", region_name=cls.region) cls.bucket_name = f"toil-test-{uuid.uuid4()}" cls.bucket = create_s3_bucket(cls.s3_resource, cls.bucket_name, cls.region) - cls.bucket.wait_until_exists() @classmethod def tearDownClass(cls) -> None: - from toil.lib.aws.utils import delete_s3_bucket + from toil.lib.aws.s3 import delete_s3_bucket if cls.bucket_name: - delete_s3_bucket(cls.s3_resource, cls.bucket_name, cls.region) + delete_s3_bucket(cls.s3_resource, cls.bucket_name) super().tearDownClass() + class AWSStateStoreTest(hidden.AbstractStateStoreTest, BucketUsingTest): """Test AWS-based state storage.""" diff --git a/src/toil/utils/toilLaunchCluster.py b/src/toil/utils/toilLaunchCluster.py index 328f76401d..80d9cd6fbf 100644 --- a/src/toil/utils/toilLaunchCluster.py +++ b/src/toil/utils/toilLaunchCluster.py @@ -18,11 +18,9 @@ from typing import Dict, List, Tuple, Union from toil import applianceSelf + +from toil.lib.aws import tags_from_env from toil.common import parser_with_common_options -try: - from toil.lib.aws import build_tag_dict_from_env -except ModuleNotFoundError: - build_tag_dict_from_env: Dict[str, str] = lambda _: {} # type: ignore[no-redef] from toil.lib.conversions import opt_strtobool from toil.provisioners import (check_valid_node_types, cluster_factory, @@ -130,7 +128,7 @@ def main() -> None: options = parser.parse_args() set_logging_from_options(options) - tags = create_tags_dict(options.tags) if options.tags else build_tag_dict_from_env() + tags = create_tags_dict(options.tags) if options.tags else tags_from_env() # Get worker node types worker_node_types = parse_node_types(options.nodeTypes)