diff --git a/.changes/next-release/enhancement-s3-49696.json b/.changes/next-release/enhancement-s3-49696.json new file mode 100644 index 000000000000..4bf1f1917dd4 --- /dev/null +++ b/.changes/next-release/enhancement-s3-49696.json @@ -0,0 +1,5 @@ +{ + "type": "enhancement", + "category": "``s3``", + "description": "Adds new parameter ``--case-conflict`` that configures how case conflicts are handled on case-insensitive filesystems" +} diff --git a/awscli/customizations/s3/filegenerator.py b/awscli/customizations/s3/filegenerator.py index af2e8318adb9..4571973d410c 100644 --- a/awscli/customizations/s3/filegenerator.py +++ b/awscli/customizations/s3/filegenerator.py @@ -94,7 +94,8 @@ def __init__(self, directory, filename): class FileStat(object): def __init__(self, src, dest=None, compare_key=None, size=None, last_update=None, src_type=None, dest_type=None, - operation_name=None, response_data=None, etag=None): + operation_name=None, response_data=None, etag=None, + case_conflict_submitted=None, case_conflict_key=None,): self.src = src self.dest = dest self.compare_key = compare_key @@ -105,6 +106,8 @@ def __init__(self, src, dest=None, compare_key=None, size=None, self.operation_name = operation_name self.response_data = response_data self.etag = etag + self.case_conflict_submitted = case_conflict_submitted + self.case_conflict_key = case_conflict_key class FileGenerator(object): diff --git a/awscli/customizations/s3/fileinfo.py b/awscli/customizations/s3/fileinfo.py index 463044df9636..c77cf5803ebb 100644 --- a/awscli/customizations/s3/fileinfo.py +++ b/awscli/customizations/s3/fileinfo.py @@ -42,7 +42,8 @@ def __init__(self, src, dest=None, compare_key=None, size=None, last_update=None, src_type=None, dest_type=None, operation_name=None, client=None, parameters=None, source_client=None, is_stream=False, - associated_response_data=None, etag=None): + associated_response_data=None, etag=None, + case_conflict_submitted=None, case_conflict_key=None,): self.src = src self.src_type = src_type self.operation_name = operation_name @@ -60,6 +61,8 @@ def __init__(self, src, dest=None, compare_key=None, size=None, self.is_stream = is_stream self.associated_response_data = associated_response_data self.etag = etag + self.case_conflict_submitted = case_conflict_submitted + self.case_conflict_key = case_conflict_key def is_glacier_compatible(self): """Determines if a file info object is glacier compatible diff --git a/awscli/customizations/s3/fileinfobuilder.py b/awscli/customizations/s3/fileinfobuilder.py index d4a2d12c385e..9a34b0a5ac20 100644 --- a/awscli/customizations/s3/fileinfobuilder.py +++ b/awscli/customizations/s3/fileinfobuilder.py @@ -46,6 +46,12 @@ def _inject_info(self, file_base): file_info_attr['is_stream'] = self._is_stream file_info_attr['associated_response_data'] = file_base.response_data file_info_attr['etag'] = file_base.etag + file_info_attr['case_conflict_submitted'] = getattr( + file_base, 'case_conflict_submitted', None + ) + file_info_attr['case_conflict_key'] = getattr( + file_base, 'case_conflict_key', None + ) # This is a bit quirky. The below conditional hinges on the --delete # flag being set, which only occurs during a sync command. The source diff --git a/awscli/customizations/s3/s3handler.py b/awscli/customizations/s3/s3handler.py index 5c875061c441..84c42cd729f8 100644 --- a/awscli/customizations/s3/s3handler.py +++ b/awscli/customizations/s3/s3handler.py @@ -47,6 +47,7 @@ from awscli.customizations.s3.utils import DeleteSourceFileSubscriber from awscli.customizations.s3.utils import DeleteSourceObjectSubscriber from awscli.customizations.s3.utils import DeleteCopySourceObjectSubscriber +from awscli.customizations.s3.utils import CaseConflictCleanupSubscriber from awscli.compat import get_binary_stdin @@ -403,6 +404,13 @@ def _add_additional_subscribers(self, subscribers, fileinfo): if self._cli_params.get('is_move', False): subscribers.append(DeleteSourceObjectSubscriber( fileinfo.source_client)) + if fileinfo.case_conflict_submitted is not None: + subscribers.append( + CaseConflictCleanupSubscriber( + fileinfo.case_conflict_submitted, + fileinfo.case_conflict_key, + ) + ) def _submit_transfer_request(self, fileinfo, extra_args, subscribers): bucket, key = find_bucket_key(fileinfo.src) diff --git a/awscli/customizations/s3/subcommands.py b/awscli/customizations/s3/subcommands.py index 47ef10e88eec..f77d7196e488 100644 --- a/awscli/customizations/s3/subcommands.py +++ b/awscli/customizations/s3/subcommands.py @@ -34,7 +34,8 @@ S3PathResolver from awscli.customizations.utils import uni_print from awscli.customizations.s3.syncstrategy.base import MissingFileSync, \ - SizeAndLastModifiedSync, NeverSync + SizeAndLastModifiedSync, NeverSync, AlwaysSync +from awscli.customizations.s3.syncstrategy.caseconflict import CaseConflictSync from awscli.customizations.s3 import transferconfig from awscli.utils import resolve_v2_debug_mode @@ -482,6 +483,33 @@ ) } +CASE_CONFLICT = { + 'name': 'case-conflict', + 'choices': [ + 'ignore', + 'skip', + 'warn', + 'error', + ], + 'default': 'ignore', + 'help_text': ( + "Configures behavior when attempting to download multiple objects " + "whose keys differ only by case, which can cause undefined behavior " + "on case-insensitive filesystems. " + "This parameter only applies for commands that perform multiple S3 " + "to local downloads. " + f"See Handling case " + "conflicts for details. Valid values are: " + "" + ), + } + TRANSFER_ARGS = [DRYRUN, QUIET, INCLUDE, EXCLUDE, ACL, FOLLOW_SYMLINKS, NO_FOLLOW_SYMLINKS, NO_GUESS_MIME_TYPE, SSE, SSE_C, SSE_C_KEY, SSE_KMS_KEY_ID, SSE_C_COPY_SOURCE, @@ -807,7 +835,8 @@ class CpCommand(S3TransferCommand): "or " ARG_TABLE = [{'name': 'paths', 'nargs': 2, 'positional_arg': True, 'synopsis': USAGE}] + TRANSFER_ARGS + \ - [METADATA, METADATA_DIRECTIVE, EXPECTED_SIZE, RECURSIVE] + [METADATA, METADATA_DIRECTIVE, EXPECTED_SIZE, RECURSIVE, + CASE_CONFLICT] class MvCommand(S3TransferCommand): @@ -817,7 +846,8 @@ class MvCommand(S3TransferCommand): "or " ARG_TABLE = [{'name': 'paths', 'nargs': 2, 'positional_arg': True, 'synopsis': USAGE}] + TRANSFER_ARGS +\ - [METADATA, METADATA_DIRECTIVE, RECURSIVE, VALIDATE_SAME_S3_PATHS] + [METADATA, METADATA_DIRECTIVE, RECURSIVE, VALIDATE_SAME_S3_PATHS, + CASE_CONFLICT] class RmCommand(S3TransferCommand): @@ -839,7 +869,7 @@ class SyncCommand(S3TransferCommand): " or " ARG_TABLE = [{'name': 'paths', 'nargs': 2, 'positional_arg': True, 'synopsis': USAGE}] + TRANSFER_ARGS + \ - [METADATA, METADATA_DIRECTIVE] + [METADATA, METADATA_DIRECTIVE, CASE_CONFLICT] class MbCommand(S3Command): @@ -1004,7 +1034,16 @@ def choose_sync_strategies(self): # Set the default strategies. sync_strategies['file_at_src_and_dest_sync_strategy'] = \ SizeAndLastModifiedSync() - sync_strategies['file_not_at_dest_sync_strategy'] = MissingFileSync() + if self._should_handle_case_conflicts(): + sync_strategies['file_not_at_dest_sync_strategy'] = ( + CaseConflictSync( + on_case_conflict=self.parameters['case_conflict'] + ) + ) + else: + sync_strategies['file_not_at_dest_sync_strategy'] = ( + MissingFileSync() + ) sync_strategies['file_not_at_src_sync_strategy'] = NeverSync() # Determine what strategies to override if any. @@ -1138,6 +1177,12 @@ def run(self): 'filters': [create_filter(self.parameters)], 'file_info_builder': [file_info_builder], 's3_handler': [s3_transfer_handler]} + if self._should_handle_case_conflicts(): + self._handle_case_conflicts( + command_dict, + rev_files, + rev_generator, + ) elif self.cmd == 'rm': command_dict = {'setup': [files], 'file_generator': [file_generator], @@ -1150,6 +1195,12 @@ def run(self): 'filters': [create_filter(self.parameters)], 'file_info_builder': [file_info_builder], 's3_handler': [s3_transfer_handler]} + if self._should_handle_case_conflicts(): + self._handle_case_conflicts( + command_dict, + rev_files, + rev_generator, + ) files = command_dict['setup'] while self.instructions: @@ -1215,6 +1266,74 @@ def _map_sse_c_params(self, request_parameters, paths_type): } ) + def _should_handle_case_conflicts(self): + return ( + self.cmd in {'sync', 'cp', 'mv'} + and self.parameters.get('paths_type') == 's3local' + and self.parameters['case_conflict'] != 'ignore' + and self.parameters.get('dir_op') + ) + + def _handle_case_conflicts(self, command_dict, rev_files, rev_generator): + # Objects are not returned in lexicographical order when + # operated on S3 Express directory buckets. This is required + # for sync operations to behave correctly, which is what + # recursive copies and moves fall back to so potential case + # conflicts can be detected and handled. + if not is_s3express_bucket( + split_s3_bucket_key(self.parameters['src'])[0] + ): + self._modify_instructions_for_case_conflicts( + command_dict, rev_files, rev_generator + ) + return + # `skip` and `error` are not valid choices in this case because + # it's not possible to detect case conflicts. + if self.parameters['case_conflict'] not in {'ignore', 'warn'}: + raise ValueError( + f"`{self.parameters['case_conflict']}` is not a valid value " + "for `--case-conflict` when operating on S3 Express " + "directory buckets. Valid values: `warn`, `ignore`." + ) + msg = ( + "warning: Recursive copies/moves from an S3 Express " + "directory bucket to a case-insensitive local filesystem " + "may result in undefined behavior if there are " + "S3 object key names that differ only by case. To disable " + "this warning, set the `--case-conflict` parameter to `ignore`. " + f"For more information, see {CaseConflictSync.DOC_URI}." + ) + uni_print(msg, sys.stderr) + + def _modify_instructions_for_case_conflicts( + self, command_dict, rev_files, rev_generator + ): + # Command will perform recursive S3 to local downloads. + # Checking for potential case conflicts requires knowledge + # of local files. Instead of writing a separate validation + # mechanism for recursive downloads, we modify the instructions + # to mimic a sync command. + sync_strategies = { + # Local filename exists with exact case match. Always sync + # because it's a copy operation. + 'file_at_src_and_dest_sync_strategy': AlwaysSync(), + # Local filename either doesn't exist or differs only by case. + # Let `CaseConflictSync` determine which it is and handle it + # according to configured `--case-conflict` parameter. + 'file_not_at_dest_sync_strategy': CaseConflictSync( + on_case_conflict=self.parameters['case_conflict'] + ), + # Copy is one-way so never sync if not at source. + 'file_not_at_src_sync_strategy': NeverSync(), + } + command_dict['setup'].append(rev_files) + command_dict['file_generator'].append(rev_generator) + command_dict['filters'].append(create_filter(self.parameters)) + command_dict['comparator'] = [Comparator(**sync_strategies)] + self.instructions.insert( + self.instructions.index('file_info_builder'), 'comparator' + ) + class CommandParameters(object): """ diff --git a/awscli/customizations/s3/syncstrategy/base.py b/awscli/customizations/s3/syncstrategy/base.py index 3d6a3a663ae2..a3d653849d75 100644 --- a/awscli/customizations/s3/syncstrategy/base.py +++ b/awscli/customizations/s3/syncstrategy/base.py @@ -254,3 +254,12 @@ def determine_should_sync(self, src_file, dest_file): LOG.debug("syncing: %s -> %s, file does not exist at destination", src_file.src, src_file.dest) return True + + +class AlwaysSync(BaseSync): + def __init__(self, sync_type='file_at_src_and_dest'): + super(AlwaysSync, self).__init__(sync_type) + + def determine_should_sync(self, src_file, dest_file): + LOG.debug(f"syncing: {src_file.src} -> {src_file.dest}") + return True diff --git a/awscli/customizations/s3/syncstrategy/caseconflict.py b/awscli/customizations/s3/syncstrategy/caseconflict.py new file mode 100644 index 000000000000..8c8520a0c793 --- /dev/null +++ b/awscli/customizations/s3/syncstrategy/caseconflict.py @@ -0,0 +1,96 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +import logging +import os +import sys + +from awscli.customizations.s3.syncstrategy.base import BaseSync +from awscli.customizations.utils import uni_print + +LOG = logging.getLogger(__name__) + + +class CaseConflictException(Exception): + pass + + +class CaseConflictSync(BaseSync): + DOC_URI = ( + "https://docs.aws.amazon.com/cli/v1/topic/" + "s3-case-insensitivity.html" + ) + + def __init__( + self, + sync_type='file_not_at_dest', + on_case_conflict='ignore', + submitted=None, + ): + super().__init__(sync_type) + self._on_case_conflict = on_case_conflict + if submitted is None: + submitted = set() + self._submitted = submitted + + @property + def submitted(self): + return self._submitted + + def determine_should_sync(self, src_file, dest_file): + # `src_file.compare_key` and `dest_file.compare_key` are not equal. + # This could mean that they're completely different or differ + # only by case. eg, `/tmp/a` and `/tmp/b` versus `/tmp/a` and `/tmp/A`. + # If the source file's destination already exists, that means it + # differs only by case and the conflict needs to be handled. + should_sync = True + # Normalize compare key for case sensitivity. + lower_compare_key = src_file.compare_key.lower() + if lower_compare_key in self._submitted or os.path.exists( + src_file.dest + ): + handler = getattr(self, f"_handle_{self._on_case_conflict}") + should_sync = handler(src_file) + if should_sync: + LOG.debug(f"syncing: {src_file.src} -> {src_file.dest}") + self._submitted.add(lower_compare_key) + # Set properties so that a subscriber can be created + # that removes the key from the set after download finishes. + src_file.case_conflict_submitted = self._submitted + src_file.case_conflict_key = lower_compare_key + return should_sync + + @staticmethod + def _handle_ignore(src_file): + return True + + @staticmethod + def _handle_skip(src_file): + msg = ( + f"warning: Skipping {src_file.src} -> {src_file.dest} " + "because a file whose name differs only by case either exists " + "or is being downloaded.\n" + ) + uni_print(msg, sys.stderr) + return False + + @staticmethod + def _handle_warn(src_file): + msg = ( + f"warning: Downloading {src_file.src} -> {src_file.dest} " + "despite a file whose name differs only by case either existing " + "or being downloaded. This behavior is not defined on " + "case-insensitive filesystems and may result in overwriting " + "existing files or race conditions between concurrent downloads. " + f"For more information, see {CaseConflictSync.DOC_URI}.\n" + ) + uni_print(msg, sys.stderr) + return True + + @staticmethod + def _handle_error(src_file): + msg = ( + f"Failed to download {src_file.src} -> {src_file.dest} " + "because a file whose name differs only by case either exists " + "or is being downloaded." + ) + raise CaseConflictException(msg) diff --git a/awscli/customizations/s3/utils.py b/awscli/customizations/s3/utils.py index 0c18c3a7e76e..2d22e58daf4e 100644 --- a/awscli/customizations/s3/utils.py +++ b/awscli/customizations/s3/utils.py @@ -690,6 +690,20 @@ def _on_failure(self, future, e): pass +class CaseConflictCleanupSubscriber(BaseSubscriber): + """ + A subscriber which removes object compare key from case conflict set + when download finishes. + """ + + def __init__(self, submitted, case_conflict_key): + self._submitted = submitted + self._key = case_conflict_key + + def on_done(self, future, **kwargs): + self._submitted.discard(self._key) + + class DeleteSourceSubscriber(OnDoneFilteredSubscriber): """A subscriber which deletes the source of the transfer.""" def _on_success(self, future): diff --git a/awscli/testutils.py b/awscli/testutils.py index b92dcd21b5dc..46d8d4143133 100644 --- a/awscli/testutils.py +++ b/awscli/testutils.py @@ -27,7 +27,9 @@ import logging import os import platform +import random import shutil +import string import sys import tempfile import time @@ -36,6 +38,7 @@ from pprint import pformat from subprocess import PIPE, Popen from unittest import mock +from pathlib import Path import botocore.loaders from botocore.awsrequest import AWSResponse @@ -49,6 +52,11 @@ INTEG_LOG = logging.getLogger('awscli.tests.integration') AWS_CMD = None +with tempfile.TemporaryDirectory() as tmpdir: + with open(Path(tmpdir) / 'aws-cli-tmp-file', 'w') as f: + pass + CASE_INSENSITIVE = (Path(tmpdir) / 'AWS-CLI-TMP-FILE').exists() + def skip_if_windows(reason): """Decorator to skip tests that should not be run on windows. @@ -69,6 +77,15 @@ def decorator(func): return decorator +def skip_if_case_sensitive(): + def decorator(func): + return unittest.skipIf( + not CASE_INSENSITIVE, + "This test requires a case-insensitive filesystem." + )(func) + return decorator + + def create_clidriver(): driver = awscli.clidriver.create_clidriver() session = driver.session diff --git a/awscli/topics/s3-case-insensitivity.rst b/awscli/topics/s3-case-insensitivity.rst new file mode 100644 index 000000000000..8437ac416263 --- /dev/null +++ b/awscli/topics/s3-case-insensitivity.rst @@ -0,0 +1,105 @@ +:title: AWS CLI S3 Case-Insensitivity +:description: Using 'aws s3' commands on case-insensitive filesystems +:category: S3 +:related command: s3 cp, s3 sync, s3 mv + + +This page explains how to detect and handle potential case conflicts when +downloading multiple objects from S3 to a local case-insensitive filesystem +using a single AWS CLI command. + +Case conflicts +============== +S3 object keys are case-sensitive meaning that a bucket can have a set of +key names that differ only by case, for example, ``a.txt`` and ``A.txt``. + +The AWS CLI offers high-level S3 commands that manage transfers of +multiple S3 objects using a single command: + +* ``aws s3 sync`` +* ``aws s3 cp --recursive`` +* ``aws s3 mv --recursive`` + +Case conflicts can occur on case-insensitive filesystems when an S3 bucket +has multiple objects whose keys differ only by case and a single AWS CLI +command is called to download multiple S3 objects **OR** a local file +already exists whose name differs only by case. + +For example, consider an S3 bucket with the following stored objects: + +* ``a.txt`` +* ``A.txt`` + +When the following AWS CLI command is called, the AWS CLI will submit +requests to download ``a.txt`` and ``A.txt``. Since only +one can exist on a case-insensitive filesystem, the last download to finish +will be the file that's locally available. + +.. code-block:: + + aws s3 sync s3://examplebucket ./mylocaldir + +Detecting and handling case conflicts +===================================== +To detect and handle case conflicts, you can specify the ``--case-conflict`` +parameter. The following values are valid options: + +* ``error`` - When a case conflict is detected, the command will immediately + fail and abort in-progress downloads. +* ``warn`` - When a case conflict is detected, the AWS CLI will + display a warning. +* ``skip`` - When a case conflict is detected, the command will skip + downloading the object and continue and display a warning. +* ``ignore`` - (Default) Case conflicts will not be detected or handled. + + +Continuing the prior example, the following describes what happens when +appending the ``--case-conflict`` parameter with possible values: + +``--case-conflict error`` + +1. Submit a download request for ``A.txt``. +2. Detect that ``a.txt`` conflicts with an object that's been submitted for download. +3. Throw an error. If ``A.txt`` finished downloading, it will be locally available. Otherwise, the download request for ``A.txt`` will be aborted. + +``--case-conflict warn`` + +1. Submit a download request for ``A.txt``. +2. Detect that ``a.txt`` conflicts with an object that's been submitted for download. +3. Display a warning. +4. Submit a download request for ``a.txt``, downloading ``A.txt`` and ``a.txt`` in parallel. + +``--case-conflict skip`` + +1. Submit a download request for ``A.txt``. +2. Detect that ``a.txt`` conflicts with an object that's been submitted for download. +3. Skip downloading ``a.txt`` and continue. + +``--case-conflict ignore`` + +1. Submit a download request for ``A.txt``. +2. Submit a download request for ``a.txt``, downloading ``A.txt`` and ``a.txt`` in parallel. + +If your local filesystem is case-sensitive, there's no need to detect and +handle case conflicts. We recommend setting ``--case-conflict ignore`` +in this case. + +S3 Express directory buckets +============================ +Detecting case conflicts is **NOT** supported when the source is an S3 Express +directory bucket. When operating on directory buckets, valid values for the +``--case-conflict`` parameter are: + +* ``warn`` +* ``ignore`` + +The following values are invalid when operating on directory buckets: + +* ``error`` +* ``skip`` + +For example, calling the following command will fail: + +.. code-block:: + + aws s3 cp s3://mydirbucket--usw2-az1--x-s3 ./mylocaldir --recursive --case-conflict error \ No newline at end of file diff --git a/awscli/topics/topic-tags.json b/awscli/topics/topic-tags.json index 9d3a684bb06b..765424a8a6f1 100644 --- a/awscli/topics/topic-tags.json +++ b/awscli/topics/topic-tags.json @@ -69,5 +69,21 @@ "title": [ "AWS CLI S3 FAQ" ] + }, + "s3-case-insensitivity": { + "category": [ + "S3" + ], + "description": [ + "Using 'aws s3' commands on case-insensitive filesystems" + ], + "related command": [ + "s3 cp", + "s3 sync", + "s3 mv" + ], + "title": [ + "AWS CLI S3 Case-Insensitivity" + ] } } \ No newline at end of file diff --git a/tests/functional/s3/test_cp_command.py b/tests/functional/s3/test_cp_command.py index 6b13278751b0..507d133a32e2 100644 --- a/tests/functional/s3/test_cp_command.py +++ b/tests/functional/s3/test_cp_command.py @@ -13,11 +13,12 @@ # language governing permissions and limitations under the License. import os -from awscli.testutils import BaseAWSCommandParamsTest +from awscli.testutils import BaseAWSCommandParamsTest, skip_if_windows from awscli.testutils import capture_input from awscli.testutils import mock from awscli.compat import BytesIO from tests.functional.s3 import BaseS3TransferCommandTest +from tests.functional.s3.test_sync_command import TestSyncCaseConflict from tests import requires_crt @@ -1313,3 +1314,55 @@ def test_accepts_mrap_arns_with_slash(self): self.put_object_request(mrap_arn, 'mykey') ] ) + + +class TestCpRecursiveCaseConflict(TestSyncCaseConflict): + prefix = 's3 cp --recursive ' + + def test_ignore_by_default(self): + self.files.create_file(self.lower_key, 'mycontent') + # Note there's no --case-conflict param. + cmd = f"{self.prefix} s3://bucket {self.files.rootdir}" + self.parsed_responses = [ + self.list_objects_response([self.upper_key]), + self.get_object_response(), + ] + # Expect success, so not error mode. + _, stderr, _ = self.run_cmd(cmd, expected_rc=0) + # No warnings in stderr, so not warn or skip mode. + assert not stderr + + +class TestS3ExpressCpRecursive(BaseCPCommandTest): + prefix = 's3 cp --recursive ' + + def test_s3_express_error_raises_exception(self): + cmd = ( + f"{self.prefix} s3://bucket--usw2-az1--x-s3 {self.files.rootdir} " + "--case-conflict error" + ) + _, stderr, _ = self.run_cmd(cmd, expected_rc=255) + assert "`error` is not a valid value" in stderr + + def test_s3_express_skip_raises_exception(self): + cmd = ( + f"{self.prefix} s3://bucket--usw2-az1--x-s3 {self.files.rootdir} " + "--case-conflict skip" + ) + _, stderr, _ = self.run_cmd(cmd, expected_rc=255) + assert "`skip` is not a valid value" in stderr + + @skip_if_windows("Can't rename to same file") + def test_s3_express_warn_emits_warning(self): + cmd = ( + f"{self.prefix} s3://bucket--usw2-az1--x-s3 {self.files.rootdir} " + "--case-conflict warn" + ) + self.parsed_responses = [ + self.list_objects_response(['a.txt', 'A.txt']), + self.get_object_response(), + self.get_object_response(), + ] + + _, stderr, _ = self.run_cmd(cmd, expected_rc=0) + assert "warning: Recursive copies/moves" in stderr diff --git a/tests/functional/s3/test_mv_command.py b/tests/functional/s3/test_mv_command.py index 5ecd434d9aa8..dcddbad7ca5b 100644 --- a/tests/functional/s3/test_mv_command.py +++ b/tests/functional/s3/test_mv_command.py @@ -13,7 +13,9 @@ # language governing permissions and limitations under the License. from awscli.customizations.s3.utils import S3PathResolver from awscli.compat import BytesIO +from awscli.testutils import skip_if_case_sensitive from tests.functional.s3 import BaseS3TransferCommandTest +from tests.functional.s3.test_sync_command import TestSyncCaseConflict from tests import requires_crt @@ -446,3 +448,84 @@ def test_raises_warning_if_validation_not_set_source(self): "myaccesspoint/key " "s3://bucket/key") self.assert_raises_warning(cmdline) + + +class TestMvRecursiveCaseConflict(TestSyncCaseConflict): + prefix = 's3 mv --recursive ' + + @skip_if_case_sensitive() + def test_warn_with_existing_file(self): + self.files.create_file(self.lower_key, 'mycontent') + cmd = ( + f"{self.prefix} s3://bucket {self.files.rootdir} " + "--case-conflict warn" + ) + self.parsed_responses = [ + self.list_objects_response([self.upper_key]), + self.get_object_response(), + self.delete_object_response(), + ] + _, stderr, _ = self.run_cmd(cmd, expected_rc=0) + assert f"warning: Downloading bucket/{self.upper_key}" in stderr + + def test_warn_with_case_conflicts_in_s3(self): + # This test case becomes very flaky because mv + # performs a get and delete operation twice. + # Delete is called after the get finishes, but + # the order of responses become non-deterministic + # when downloading multiple objects. The order + # could be [get, get, delete, delete] or + # [get, delete, get, delete]. Rather than making + # complex changes to patch this behavior, we're + # delegating the assertions to the sync and cp + # test suites. + pass + + def test_skip_with_case_conflicts_in_s3(self): + cmd = ( + f"{self.prefix} s3://bucket {self.files.rootdir} " + "--case-conflict skip" + ) + self.parsed_responses = [ + self.list_objects_response([self.upper_key, self.lower_key]), + self.get_object_response(), + self.delete_object_response(), + ] + _, stderr, _ = self.run_cmd(cmd, expected_rc=0) + assert f"warning: Skipping bucket/{self.lower_key}" in stderr + + def test_ignore_with_existing_file(self): + self.files.create_file(self.lower_key, 'mycontent') + cmd = ( + f"{self.prefix} s3://bucket {self.files.rootdir} " + "--case-conflict ignore" + ) + self.parsed_responses = [ + self.list_objects_response([self.upper_key]), + self.get_object_response(), + self.delete_object_response(), + ] + self.run_cmd(cmd, expected_rc=0) + + def test_ignore_with_case_conflicts_in_s3(self): + pass + + +class TestS3ExpressMvRecursive(BaseS3TransferCommandTest): + prefix = 's3 mv --recursive ' + + def test_s3_express_error_raises_exception(self): + cmd = ( + f"{self.prefix} s3://bucket--usw2-az1--x-s3 {self.files.rootdir} " + "--case-conflict error" + ) + _, stderr, _ = self.run_cmd(cmd, expected_rc=255) + assert "`error` is not a valid value" in stderr + + def test_s3_express_skip_raises_exception(self): + cmd = ( + f"{self.prefix} s3://bucket--usw2-az1--x-s3 {self.files.rootdir} " + "--case-conflict skip" + ) + _, stderr, _ = self.run_cmd(cmd, expected_rc=255) + assert "`skip` is not a valid value" in stderr diff --git a/tests/functional/s3/test_sync_command.py b/tests/functional/s3/test_sync_command.py index bdcdbfcc29b9..f67cd07f13a4 100644 --- a/tests/functional/s3/test_sync_command.py +++ b/tests/functional/s3/test_sync_command.py @@ -12,7 +12,7 @@ # language governing permissions and limitations under the License. import os -from awscli.testutils import mock, cd +from awscli.testutils import cd, mock, skip_if_case_sensitive, skip_if_windows from awscli.compat import BytesIO from tests.functional.s3 import BaseS3TransferCommandTest @@ -430,3 +430,110 @@ def test_compatible_with_sync_with_local_directory_like_directory_bucket(self): # Just asserting that command validated and made an API call self.assertEqual(len(self.operations_called), 1) self.assertEqual(self.operations_called[0][0].name, 'ListObjectsV2') + + +class TestSyncCaseConflict(BaseS3TransferCommandTest): + prefix = 's3 sync ' + + def setUp(self): + super().setUp() + self.lower_key = 'a.txt' + self.upper_key = 'A.txt' + + @skip_if_case_sensitive() + def test_error_with_existing_file(self): + self.files.create_file(self.lower_key, 'mycontent') + cmd = ( + f"{self.prefix} s3://bucket {self.files.rootdir} " + "--case-conflict error" + ) + self.parsed_responses = [self.list_objects_response([self.upper_key])] + _, stderr, _ = self.run_cmd(cmd, expected_rc=1) + assert f"Failed to download bucket/{self.upper_key}" in stderr + + def test_error_with_case_conflicts_in_s3(self): + cmd = ( + f"{self.prefix} s3://bucket {self.files.rootdir} " + "--case-conflict error" + ) + self.parsed_responses = [ + self.list_objects_response([self.upper_key, self.lower_key]) + ] + _, stderr, _ = self.run_cmd(cmd, expected_rc=1) + assert f"Failed to download bucket/{self.lower_key}" in stderr + + @skip_if_case_sensitive() + def test_warn_with_existing_file(self): + self.files.create_file(self.lower_key, 'mycontent') + cmd = ( + f"{self.prefix} s3://bucket {self.files.rootdir} " + "--case-conflict warn" + ) + self.parsed_responses = [ + self.list_objects_response([self.upper_key]), + self.get_object_response(), + ] + _, stderr, _ = self.run_cmd(cmd, expected_rc=0) + assert f"warning: Downloading bucket/{self.upper_key}" in stderr + + @skip_if_windows("Can't rename to same file") + def test_warn_with_case_conflicts_in_s3(self): + cmd = ( + f"{self.prefix} s3://bucket {self.files.rootdir} " + "--case-conflict warn" + ) + self.parsed_responses = [ + self.list_objects_response([self.upper_key, self.lower_key]), + self.get_object_response(), + self.get_object_response(), + ] + _, stderr, _ = self.run_cmd(cmd, expected_rc=0) + assert f"warning: Downloading bucket/{self.lower_key}" in stderr + + @skip_if_case_sensitive() + def test_skip_with_existing_file(self): + self.files.create_file(self.lower_key, 'mycontent') + cmd = ( + f"{self.prefix} s3://bucket {self.files.rootdir} " + "--case-conflict skip" + ) + self.parsed_responses = [self.list_objects_response([self.upper_key])] + _, stderr, _ = self.run_cmd(cmd, expected_rc=0) + assert f"warning: Skipping bucket/{self.upper_key}" in stderr + + def test_skip_with_case_conflicts_in_s3(self): + cmd = ( + f"{self.prefix} s3://bucket {self.files.rootdir} " + "--case-conflict skip" + ) + self.parsed_responses = [ + self.list_objects_response([self.upper_key, self.lower_key]), + self.get_object_response(), + ] + _, stderr, _ = self.run_cmd(cmd, expected_rc=0) + assert f"warning: Skipping bucket/{self.lower_key}" in stderr + + def test_ignore_with_existing_file(self): + self.files.create_file(self.lower_key, 'mycontent') + cmd = ( + f"{self.prefix} s3://bucket {self.files.rootdir} " + "--case-conflict ignore" + ) + self.parsed_responses = [ + self.list_objects_response([self.upper_key]), + self.get_object_response(), + ] + self.run_cmd(cmd, expected_rc=0) + + @skip_if_windows("Can't rename to same file") + def test_ignore_with_case_conflicts_in_s3(self): + cmd = ( + f"{self.prefix} s3://bucket {self.files.rootdir} " + "--case-conflict ignore" + ) + self.parsed_responses = [ + self.list_objects_response([self.upper_key, self.lower_key]), + self.get_object_response(), + self.get_object_response(), + ] + self.run_cmd(cmd, expected_rc=0) diff --git a/tests/unit/customizations/s3/syncstrategy/test_base.py b/tests/unit/customizations/s3/syncstrategy/test_base.py index c6dba74b28a3..9e8634d8c13e 100644 --- a/tests/unit/customizations/s3/syncstrategy/test_base.py +++ b/tests/unit/customizations/s3/syncstrategy/test_base.py @@ -13,7 +13,7 @@ import datetime from awscli.customizations.s3.filegenerator import FileStat -from awscli.customizations.s3.syncstrategy.base import BaseSync, \ +from awscli.customizations.s3.syncstrategy.base import AlwaysSync, BaseSync, \ SizeAndLastModifiedSync, MissingFileSync, NeverSync from awscli.testutils import mock, unittest @@ -290,5 +290,32 @@ def test_determine_should_sync(self): self.assertTrue(should_sync) +class TestAlwaysSync: + def test_determine_should_sync(self): + always_sync = AlwaysSync() + time = datetime.datetime.now() + src_file = FileStat( + src='', + dest='', + compare_key='test.py', + size=10, + last_update=time, + src_type='s3', + dest_type='local', + operation_name='download', + ) + dest_file = FileStat( + src='', + dest='', + compare_key='test.py', + size=10, + last_update=time, + src_type='local', + dest_type='s3', + operation_name='', + ) + assert always_sync.determine_should_sync(src_file, dest_file) is True + + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/customizations/s3/syncstrategy/test_caseconflict.py b/tests/unit/customizations/s3/syncstrategy/test_caseconflict.py new file mode 100644 index 000000000000..536e40f2a9be --- /dev/null +++ b/tests/unit/customizations/s3/syncstrategy/test_caseconflict.py @@ -0,0 +1,176 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +import pytest + +from awscli.customizations.s3.filegenerator import FileStat +from awscli.customizations.s3.syncstrategy.caseconflict import ( + CaseConflictException, + CaseConflictSync, +) +from awscli.testutils import skip_if_case_sensitive + + +@pytest.fixture +def lower_compare_key(): + return 'a.txt' + + +@pytest.fixture +def lower_temp_filepath(lower_compare_key, tmp_path): + p = tmp_path / lower_compare_key + return p.resolve() + + +@pytest.fixture +def lower_file_stat(lower_compare_key, lower_temp_filepath): + return FileStat( + src=f'bucket/{lower_compare_key}', + dest=lower_temp_filepath, + compare_key=lower_compare_key, + ) + + +@pytest.fixture +def upper_compare_key(): + return 'A.txt' + + +@pytest.fixture +def upper_temp_filepath(upper_compare_key, tmp_path): + p = tmp_path / upper_compare_key + p.write_text('foobar') + return p.resolve() + + +@pytest.fixture +def upper_file_stat(upper_compare_key, upper_temp_filepath): + return FileStat( + src=f'bucket/{upper_compare_key}', + dest=upper_temp_filepath, + compare_key=upper_compare_key, + ) + + +@pytest.fixture +def upper_key(upper_file_stat): + return upper_file_stat.compare_key.lower() + + +@pytest.fixture +def no_conflict_file_stat(tmp_path): + return FileStat( + src='bucket/foo.txt', + # Note that this file was never written to. + dest=f'{tmp_path}/foo.txt', + compare_key='foo.txt', + ) + + +class TestCaseConflictSync: + def test_error_with_no_conflict_syncs( + self, no_conflict_file_stat, lower_file_stat, capsys + ): + case_conflict = CaseConflictSync(on_case_conflict='error') + should_sync = case_conflict.determine_should_sync( + no_conflict_file_stat, lower_file_stat + ) + captured = capsys.readouterr() + assert should_sync is True + assert not captured.err + + @skip_if_case_sensitive() + def test_error_with_existing_file(self, lower_file_stat, upper_file_stat): + case_conflict_sync = CaseConflictSync(on_case_conflict='error') + with pytest.raises(CaseConflictException) as exc: + case_conflict_sync.determine_should_sync( + lower_file_stat, + upper_file_stat, + ) + assert 'Failed to download bucket/a.txt' in str(exc.value) + + def test_error_with_case_conflicts_in_s3( + self, lower_file_stat, upper_file_stat, upper_key + ): + case_conflict_sync = CaseConflictSync( + on_case_conflict='error', submitted={upper_key} + ) + with pytest.raises(CaseConflictException) as exc: + case_conflict_sync.determine_should_sync( + lower_file_stat, + upper_file_stat, + ) + assert 'Failed to download bucket/a.txt' in str(exc.value) + + def test_warn_with_no_conflict_syncs( + self, no_conflict_file_stat, lower_file_stat, capsys + ): + case_conflict = CaseConflictSync(on_case_conflict='warn') + should_sync = case_conflict.determine_should_sync( + no_conflict_file_stat, lower_file_stat + ) + captured = capsys.readouterr() + assert should_sync is True + assert not captured.err + + @skip_if_case_sensitive() + def test_warn_with_existing_file( + self, lower_file_stat, upper_file_stat, capsys + ): + case_conflict_sync = CaseConflictSync(on_case_conflict='warn') + should_sync = case_conflict_sync.determine_should_sync( + lower_file_stat, upper_file_stat + ) + captured = capsys.readouterr() + assert should_sync is True + assert 'warning: ' in captured.err + + def test_warn_with_case_conflicts_in_s3( + self, lower_file_stat, upper_file_stat, upper_key, capsys + ): + case_conflict_sync = CaseConflictSync( + on_case_conflict='warn', submitted={upper_key} + ) + should_sync = case_conflict_sync.determine_should_sync( + lower_file_stat, + upper_file_stat, + ) + captured = capsys.readouterr() + assert should_sync is True + assert 'warning: ' in captured.err + + def test_skip_with_no_conflict_syncs( + self, no_conflict_file_stat, lower_file_stat, capsys + ): + case_conflict = CaseConflictSync(on_case_conflict='skip') + should_sync = case_conflict.determine_should_sync( + no_conflict_file_stat, lower_file_stat + ) + captured = capsys.readouterr() + assert should_sync is True + assert not captured.err + + @skip_if_case_sensitive() + def test_skip_with_existing_file( + self, lower_file_stat, upper_file_stat, capsys + ): + case_conflict_sync = CaseConflictSync(on_case_conflict='skip') + should_sync = case_conflict_sync.determine_should_sync( + lower_file_stat, upper_file_stat + ) + captured = capsys.readouterr() + assert should_sync is False + assert 'warning: Skipping' in captured.err + + def test_skip_with_case_conflicts_in_s3( + self, lower_file_stat, upper_file_stat, upper_key, capsys + ): + case_conflict_sync = CaseConflictSync( + on_case_conflict='skip', submitted={upper_key} + ) + should_sync = case_conflict_sync.determine_should_sync( + lower_file_stat, + upper_file_stat, + ) + captured = capsys.readouterr() + assert should_sync is False + assert 'warning: Skipping' in captured.err diff --git a/tests/unit/customizations/s3/test_fileinfobuilder.py b/tests/unit/customizations/s3/test_fileinfobuilder.py index fdca6babdc11..e1284288a948 100644 --- a/tests/unit/customizations/s3/test_fileinfobuilder.py +++ b/tests/unit/customizations/s3/test_fileinfobuilder.py @@ -28,7 +28,9 @@ def test_info_setter(self): src_type='src_type', dest_type='dest_type', operation_name='operation_name', response_data='associated_response_data', - etag='etag',)] + etag='etag', + case_conflict_submitted='case_conflict_submitted', + case_conflict_key='case_conflict_key',)] file_infos = info_setter.call(files) for file_info in file_infos: attributes = file_info.__dict__.keys() diff --git a/tests/unit/customizations/s3/test_subcommands.py b/tests/unit/customizations/s3/test_subcommands.py index 25fc257fcb21..82d85bf6a25f 100644 --- a/tests/unit/customizations/s3/test_subcommands.py +++ b/tests/unit/customizations/s3/test_subcommands.py @@ -503,7 +503,8 @@ def test_run_cp_get(self): 'paths_type': 's3local', 'region': 'us-east-1', 'endpoint_url': None, 'verify_ssl': None, 'follow_symlinks': True, 'page_size': None, - 'is_stream': False, 'source_region': None, 'v2_debug': False} + 'is_stream': False, 'source_region': None, 'v2_debug': False, + 'case_conflict': 'ignore'} self.parsed_responses = [{"ETag": "abcd", "ContentLength": 100, "LastModified": "2014-01-09T20:45:49.000Z"}] config = RuntimeConfig().build_config() diff --git a/tests/unit/customizations/s3/test_utils.py b/tests/unit/customizations/s3/test_utils.py index 3754ffd662b8..96d4e216c44c 100644 --- a/tests/unit/customizations/s3/test_utils.py +++ b/tests/unit/customizations/s3/test_utils.py @@ -40,7 +40,7 @@ ProvideLastModifiedTimeSubscriber, DirectoryCreatorSubscriber, DeleteSourceObjectSubscriber, DeleteSourceFileSubscriber, DeleteCopySourceObjectSubscriber, NonSeekableStream, CreateDirectoryError, - S3PathResolver) + S3PathResolver, CaseConflictCleanupSubscriber) from awscli.customizations.s3.results import WarningResult from tests.unit.customizations.s3 import FakeTransferFuture from tests.unit.customizations.s3 import FakeTransferFutureMeta @@ -1207,3 +1207,17 @@ def test_alias_suffixes_dont_match_accesspoint_arns( def test_has_underlying_s3_path(self, path, expected_has_underlying_s3_path): has_underlying_s3_path = S3PathResolver.has_underlying_s3_path(path) assert has_underlying_s3_path == expected_has_underlying_s3_path + + +class TestCaseConflictCleanupSubscriber: + def test_on_done_removes_key_from_set(self): + submitted = {123, 456} + subscriber = CaseConflictCleanupSubscriber(submitted, 123) + subscriber.on_done(future=None) + assert submitted == {456} + + def test_on_done_handles_missing_key(self): + submitted = {456} + subscriber = CaseConflictCleanupSubscriber(submitted, 123) + subscriber.on_done(future=None) + assert submitted == {456}