diff --git a/.changes/next-release/enhancement-s3-49696.json b/.changes/next-release/enhancement-s3-49696.json
new file mode 100644
index 000000000000..4bf1f1917dd4
--- /dev/null
+++ b/.changes/next-release/enhancement-s3-49696.json
@@ -0,0 +1,5 @@
+{
+ "type": "enhancement",
+ "category": "``s3``",
+ "description": "Adds new parameter ``--case-conflict`` that configures how case conflicts are handled on case-insensitive filesystems"
+}
diff --git a/awscli/customizations/s3/filegenerator.py b/awscli/customizations/s3/filegenerator.py
index af2e8318adb9..4571973d410c 100644
--- a/awscli/customizations/s3/filegenerator.py
+++ b/awscli/customizations/s3/filegenerator.py
@@ -94,7 +94,8 @@ def __init__(self, directory, filename):
class FileStat(object):
def __init__(self, src, dest=None, compare_key=None, size=None,
last_update=None, src_type=None, dest_type=None,
- operation_name=None, response_data=None, etag=None):
+ operation_name=None, response_data=None, etag=None,
+ case_conflict_submitted=None, case_conflict_key=None,):
self.src = src
self.dest = dest
self.compare_key = compare_key
@@ -105,6 +106,8 @@ def __init__(self, src, dest=None, compare_key=None, size=None,
self.operation_name = operation_name
self.response_data = response_data
self.etag = etag
+ self.case_conflict_submitted = case_conflict_submitted
+ self.case_conflict_key = case_conflict_key
class FileGenerator(object):
diff --git a/awscli/customizations/s3/fileinfo.py b/awscli/customizations/s3/fileinfo.py
index 463044df9636..c77cf5803ebb 100644
--- a/awscli/customizations/s3/fileinfo.py
+++ b/awscli/customizations/s3/fileinfo.py
@@ -42,7 +42,8 @@ def __init__(self, src, dest=None, compare_key=None, size=None,
last_update=None, src_type=None, dest_type=None,
operation_name=None, client=None, parameters=None,
source_client=None, is_stream=False,
- associated_response_data=None, etag=None):
+ associated_response_data=None, etag=None,
+ case_conflict_submitted=None, case_conflict_key=None,):
self.src = src
self.src_type = src_type
self.operation_name = operation_name
@@ -60,6 +61,8 @@ def __init__(self, src, dest=None, compare_key=None, size=None,
self.is_stream = is_stream
self.associated_response_data = associated_response_data
self.etag = etag
+ self.case_conflict_submitted = case_conflict_submitted
+ self.case_conflict_key = case_conflict_key
def is_glacier_compatible(self):
"""Determines if a file info object is glacier compatible
diff --git a/awscli/customizations/s3/fileinfobuilder.py b/awscli/customizations/s3/fileinfobuilder.py
index d4a2d12c385e..9a34b0a5ac20 100644
--- a/awscli/customizations/s3/fileinfobuilder.py
+++ b/awscli/customizations/s3/fileinfobuilder.py
@@ -46,6 +46,12 @@ def _inject_info(self, file_base):
file_info_attr['is_stream'] = self._is_stream
file_info_attr['associated_response_data'] = file_base.response_data
file_info_attr['etag'] = file_base.etag
+ file_info_attr['case_conflict_submitted'] = getattr(
+ file_base, 'case_conflict_submitted', None
+ )
+ file_info_attr['case_conflict_key'] = getattr(
+ file_base, 'case_conflict_key', None
+ )
# This is a bit quirky. The below conditional hinges on the --delete
# flag being set, which only occurs during a sync command. The source
diff --git a/awscli/customizations/s3/s3handler.py b/awscli/customizations/s3/s3handler.py
index 5c875061c441..84c42cd729f8 100644
--- a/awscli/customizations/s3/s3handler.py
+++ b/awscli/customizations/s3/s3handler.py
@@ -47,6 +47,7 @@
from awscli.customizations.s3.utils import DeleteSourceFileSubscriber
from awscli.customizations.s3.utils import DeleteSourceObjectSubscriber
from awscli.customizations.s3.utils import DeleteCopySourceObjectSubscriber
+from awscli.customizations.s3.utils import CaseConflictCleanupSubscriber
from awscli.compat import get_binary_stdin
@@ -403,6 +404,13 @@ def _add_additional_subscribers(self, subscribers, fileinfo):
if self._cli_params.get('is_move', False):
subscribers.append(DeleteSourceObjectSubscriber(
fileinfo.source_client))
+ if fileinfo.case_conflict_submitted is not None:
+ subscribers.append(
+ CaseConflictCleanupSubscriber(
+ fileinfo.case_conflict_submitted,
+ fileinfo.case_conflict_key,
+ )
+ )
def _submit_transfer_request(self, fileinfo, extra_args, subscribers):
bucket, key = find_bucket_key(fileinfo.src)
diff --git a/awscli/customizations/s3/subcommands.py b/awscli/customizations/s3/subcommands.py
index 47ef10e88eec..f77d7196e488 100644
--- a/awscli/customizations/s3/subcommands.py
+++ b/awscli/customizations/s3/subcommands.py
@@ -34,7 +34,8 @@
S3PathResolver
from awscli.customizations.utils import uni_print
from awscli.customizations.s3.syncstrategy.base import MissingFileSync, \
- SizeAndLastModifiedSync, NeverSync
+ SizeAndLastModifiedSync, NeverSync, AlwaysSync
+from awscli.customizations.s3.syncstrategy.caseconflict import CaseConflictSync
from awscli.customizations.s3 import transferconfig
from awscli.utils import resolve_v2_debug_mode
@@ -482,6 +483,33 @@
)
}
+CASE_CONFLICT = {
+ 'name': 'case-conflict',
+ 'choices': [
+ 'ignore',
+ 'skip',
+ 'warn',
+ 'error',
+ ],
+ 'default': 'ignore',
+ 'help_text': (
+ "Configures behavior when attempting to download multiple objects "
+ "whose keys differ only by case, which can cause undefined behavior "
+ "on case-insensitive filesystems. "
+ "This parameter only applies for commands that perform multiple S3 "
+ "to local downloads. "
+ f"See Handling case "
+ "conflicts for details. Valid values are: "
+ "
"
+ "- ``error`` - Raise an error and abort downloads.
"
+ "- ``warn`` - Emit a warning and download the object.
"
+ "- ``skip`` - Skip downloading the object.
"
+ "- ``ignore`` - The default value. Ignore the conflict and "
+ "download the object.
"
+ "
"
+ ),
+ }
+
TRANSFER_ARGS = [DRYRUN, QUIET, INCLUDE, EXCLUDE, ACL,
FOLLOW_SYMLINKS, NO_FOLLOW_SYMLINKS, NO_GUESS_MIME_TYPE,
SSE, SSE_C, SSE_C_KEY, SSE_KMS_KEY_ID, SSE_C_COPY_SOURCE,
@@ -807,7 +835,8 @@ class CpCommand(S3TransferCommand):
"or "
ARG_TABLE = [{'name': 'paths', 'nargs': 2, 'positional_arg': True,
'synopsis': USAGE}] + TRANSFER_ARGS + \
- [METADATA, METADATA_DIRECTIVE, EXPECTED_SIZE, RECURSIVE]
+ [METADATA, METADATA_DIRECTIVE, EXPECTED_SIZE, RECURSIVE,
+ CASE_CONFLICT]
class MvCommand(S3TransferCommand):
@@ -817,7 +846,8 @@ class MvCommand(S3TransferCommand):
"or "
ARG_TABLE = [{'name': 'paths', 'nargs': 2, 'positional_arg': True,
'synopsis': USAGE}] + TRANSFER_ARGS +\
- [METADATA, METADATA_DIRECTIVE, RECURSIVE, VALIDATE_SAME_S3_PATHS]
+ [METADATA, METADATA_DIRECTIVE, RECURSIVE, VALIDATE_SAME_S3_PATHS,
+ CASE_CONFLICT]
class RmCommand(S3TransferCommand):
@@ -839,7 +869,7 @@ class SyncCommand(S3TransferCommand):
" or "
ARG_TABLE = [{'name': 'paths', 'nargs': 2, 'positional_arg': True,
'synopsis': USAGE}] + TRANSFER_ARGS + \
- [METADATA, METADATA_DIRECTIVE]
+ [METADATA, METADATA_DIRECTIVE, CASE_CONFLICT]
class MbCommand(S3Command):
@@ -1004,7 +1034,16 @@ def choose_sync_strategies(self):
# Set the default strategies.
sync_strategies['file_at_src_and_dest_sync_strategy'] = \
SizeAndLastModifiedSync()
- sync_strategies['file_not_at_dest_sync_strategy'] = MissingFileSync()
+ if self._should_handle_case_conflicts():
+ sync_strategies['file_not_at_dest_sync_strategy'] = (
+ CaseConflictSync(
+ on_case_conflict=self.parameters['case_conflict']
+ )
+ )
+ else:
+ sync_strategies['file_not_at_dest_sync_strategy'] = (
+ MissingFileSync()
+ )
sync_strategies['file_not_at_src_sync_strategy'] = NeverSync()
# Determine what strategies to override if any.
@@ -1138,6 +1177,12 @@ def run(self):
'filters': [create_filter(self.parameters)],
'file_info_builder': [file_info_builder],
's3_handler': [s3_transfer_handler]}
+ if self._should_handle_case_conflicts():
+ self._handle_case_conflicts(
+ command_dict,
+ rev_files,
+ rev_generator,
+ )
elif self.cmd == 'rm':
command_dict = {'setup': [files],
'file_generator': [file_generator],
@@ -1150,6 +1195,12 @@ def run(self):
'filters': [create_filter(self.parameters)],
'file_info_builder': [file_info_builder],
's3_handler': [s3_transfer_handler]}
+ if self._should_handle_case_conflicts():
+ self._handle_case_conflicts(
+ command_dict,
+ rev_files,
+ rev_generator,
+ )
files = command_dict['setup']
while self.instructions:
@@ -1215,6 +1266,74 @@ def _map_sse_c_params(self, request_parameters, paths_type):
}
)
+ def _should_handle_case_conflicts(self):
+ return (
+ self.cmd in {'sync', 'cp', 'mv'}
+ and self.parameters.get('paths_type') == 's3local'
+ and self.parameters['case_conflict'] != 'ignore'
+ and self.parameters.get('dir_op')
+ )
+
+ def _handle_case_conflicts(self, command_dict, rev_files, rev_generator):
+ # Objects are not returned in lexicographical order when
+ # operated on S3 Express directory buckets. This is required
+ # for sync operations to behave correctly, which is what
+ # recursive copies and moves fall back to so potential case
+ # conflicts can be detected and handled.
+ if not is_s3express_bucket(
+ split_s3_bucket_key(self.parameters['src'])[0]
+ ):
+ self._modify_instructions_for_case_conflicts(
+ command_dict, rev_files, rev_generator
+ )
+ return
+ # `skip` and `error` are not valid choices in this case because
+ # it's not possible to detect case conflicts.
+ if self.parameters['case_conflict'] not in {'ignore', 'warn'}:
+ raise ValueError(
+ f"`{self.parameters['case_conflict']}` is not a valid value "
+ "for `--case-conflict` when operating on S3 Express "
+ "directory buckets. Valid values: `warn`, `ignore`."
+ )
+ msg = (
+ "warning: Recursive copies/moves from an S3 Express "
+ "directory bucket to a case-insensitive local filesystem "
+ "may result in undefined behavior if there are "
+ "S3 object key names that differ only by case. To disable "
+ "this warning, set the `--case-conflict` parameter to `ignore`. "
+ f"For more information, see {CaseConflictSync.DOC_URI}."
+ )
+ uni_print(msg, sys.stderr)
+
+ def _modify_instructions_for_case_conflicts(
+ self, command_dict, rev_files, rev_generator
+ ):
+ # Command will perform recursive S3 to local downloads.
+ # Checking for potential case conflicts requires knowledge
+ # of local files. Instead of writing a separate validation
+ # mechanism for recursive downloads, we modify the instructions
+ # to mimic a sync command.
+ sync_strategies = {
+ # Local filename exists with exact case match. Always sync
+ # because it's a copy operation.
+ 'file_at_src_and_dest_sync_strategy': AlwaysSync(),
+ # Local filename either doesn't exist or differs only by case.
+ # Let `CaseConflictSync` determine which it is and handle it
+ # according to configured `--case-conflict` parameter.
+ 'file_not_at_dest_sync_strategy': CaseConflictSync(
+ on_case_conflict=self.parameters['case_conflict']
+ ),
+ # Copy is one-way so never sync if not at source.
+ 'file_not_at_src_sync_strategy': NeverSync(),
+ }
+ command_dict['setup'].append(rev_files)
+ command_dict['file_generator'].append(rev_generator)
+ command_dict['filters'].append(create_filter(self.parameters))
+ command_dict['comparator'] = [Comparator(**sync_strategies)]
+ self.instructions.insert(
+ self.instructions.index('file_info_builder'), 'comparator'
+ )
+
class CommandParameters(object):
"""
diff --git a/awscli/customizations/s3/syncstrategy/base.py b/awscli/customizations/s3/syncstrategy/base.py
index 3d6a3a663ae2..a3d653849d75 100644
--- a/awscli/customizations/s3/syncstrategy/base.py
+++ b/awscli/customizations/s3/syncstrategy/base.py
@@ -254,3 +254,12 @@ def determine_should_sync(self, src_file, dest_file):
LOG.debug("syncing: %s -> %s, file does not exist at destination",
src_file.src, src_file.dest)
return True
+
+
+class AlwaysSync(BaseSync):
+ def __init__(self, sync_type='file_at_src_and_dest'):
+ super(AlwaysSync, self).__init__(sync_type)
+
+ def determine_should_sync(self, src_file, dest_file):
+ LOG.debug(f"syncing: {src_file.src} -> {src_file.dest}")
+ return True
diff --git a/awscli/customizations/s3/syncstrategy/caseconflict.py b/awscli/customizations/s3/syncstrategy/caseconflict.py
new file mode 100644
index 000000000000..8c8520a0c793
--- /dev/null
+++ b/awscli/customizations/s3/syncstrategy/caseconflict.py
@@ -0,0 +1,96 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+import logging
+import os
+import sys
+
+from awscli.customizations.s3.syncstrategy.base import BaseSync
+from awscli.customizations.utils import uni_print
+
+LOG = logging.getLogger(__name__)
+
+
+class CaseConflictException(Exception):
+ pass
+
+
+class CaseConflictSync(BaseSync):
+ DOC_URI = (
+ "https://docs.aws.amazon.com/cli/v1/topic/"
+ "s3-case-insensitivity.html"
+ )
+
+ def __init__(
+ self,
+ sync_type='file_not_at_dest',
+ on_case_conflict='ignore',
+ submitted=None,
+ ):
+ super().__init__(sync_type)
+ self._on_case_conflict = on_case_conflict
+ if submitted is None:
+ submitted = set()
+ self._submitted = submitted
+
+ @property
+ def submitted(self):
+ return self._submitted
+
+ def determine_should_sync(self, src_file, dest_file):
+ # `src_file.compare_key` and `dest_file.compare_key` are not equal.
+ # This could mean that they're completely different or differ
+ # only by case. eg, `/tmp/a` and `/tmp/b` versus `/tmp/a` and `/tmp/A`.
+ # If the source file's destination already exists, that means it
+ # differs only by case and the conflict needs to be handled.
+ should_sync = True
+ # Normalize compare key for case sensitivity.
+ lower_compare_key = src_file.compare_key.lower()
+ if lower_compare_key in self._submitted or os.path.exists(
+ src_file.dest
+ ):
+ handler = getattr(self, f"_handle_{self._on_case_conflict}")
+ should_sync = handler(src_file)
+ if should_sync:
+ LOG.debug(f"syncing: {src_file.src} -> {src_file.dest}")
+ self._submitted.add(lower_compare_key)
+ # Set properties so that a subscriber can be created
+ # that removes the key from the set after download finishes.
+ src_file.case_conflict_submitted = self._submitted
+ src_file.case_conflict_key = lower_compare_key
+ return should_sync
+
+ @staticmethod
+ def _handle_ignore(src_file):
+ return True
+
+ @staticmethod
+ def _handle_skip(src_file):
+ msg = (
+ f"warning: Skipping {src_file.src} -> {src_file.dest} "
+ "because a file whose name differs only by case either exists "
+ "or is being downloaded.\n"
+ )
+ uni_print(msg, sys.stderr)
+ return False
+
+ @staticmethod
+ def _handle_warn(src_file):
+ msg = (
+ f"warning: Downloading {src_file.src} -> {src_file.dest} "
+ "despite a file whose name differs only by case either existing "
+ "or being downloaded. This behavior is not defined on "
+ "case-insensitive filesystems and may result in overwriting "
+ "existing files or race conditions between concurrent downloads. "
+ f"For more information, see {CaseConflictSync.DOC_URI}.\n"
+ )
+ uni_print(msg, sys.stderr)
+ return True
+
+ @staticmethod
+ def _handle_error(src_file):
+ msg = (
+ f"Failed to download {src_file.src} -> {src_file.dest} "
+ "because a file whose name differs only by case either exists "
+ "or is being downloaded."
+ )
+ raise CaseConflictException(msg)
diff --git a/awscli/customizations/s3/utils.py b/awscli/customizations/s3/utils.py
index 0c18c3a7e76e..2d22e58daf4e 100644
--- a/awscli/customizations/s3/utils.py
+++ b/awscli/customizations/s3/utils.py
@@ -690,6 +690,20 @@ def _on_failure(self, future, e):
pass
+class CaseConflictCleanupSubscriber(BaseSubscriber):
+ """
+ A subscriber which removes object compare key from case conflict set
+ when download finishes.
+ """
+
+ def __init__(self, submitted, case_conflict_key):
+ self._submitted = submitted
+ self._key = case_conflict_key
+
+ def on_done(self, future, **kwargs):
+ self._submitted.discard(self._key)
+
+
class DeleteSourceSubscriber(OnDoneFilteredSubscriber):
"""A subscriber which deletes the source of the transfer."""
def _on_success(self, future):
diff --git a/awscli/testutils.py b/awscli/testutils.py
index b92dcd21b5dc..46d8d4143133 100644
--- a/awscli/testutils.py
+++ b/awscli/testutils.py
@@ -27,7 +27,9 @@
import logging
import os
import platform
+import random
import shutil
+import string
import sys
import tempfile
import time
@@ -36,6 +38,7 @@
from pprint import pformat
from subprocess import PIPE, Popen
from unittest import mock
+from pathlib import Path
import botocore.loaders
from botocore.awsrequest import AWSResponse
@@ -49,6 +52,11 @@
INTEG_LOG = logging.getLogger('awscli.tests.integration')
AWS_CMD = None
+with tempfile.TemporaryDirectory() as tmpdir:
+ with open(Path(tmpdir) / 'aws-cli-tmp-file', 'w') as f:
+ pass
+ CASE_INSENSITIVE = (Path(tmpdir) / 'AWS-CLI-TMP-FILE').exists()
+
def skip_if_windows(reason):
"""Decorator to skip tests that should not be run on windows.
@@ -69,6 +77,15 @@ def decorator(func):
return decorator
+def skip_if_case_sensitive():
+ def decorator(func):
+ return unittest.skipIf(
+ not CASE_INSENSITIVE,
+ "This test requires a case-insensitive filesystem."
+ )(func)
+ return decorator
+
+
def create_clidriver():
driver = awscli.clidriver.create_clidriver()
session = driver.session
diff --git a/awscli/topics/s3-case-insensitivity.rst b/awscli/topics/s3-case-insensitivity.rst
new file mode 100644
index 000000000000..8437ac416263
--- /dev/null
+++ b/awscli/topics/s3-case-insensitivity.rst
@@ -0,0 +1,105 @@
+:title: AWS CLI S3 Case-Insensitivity
+:description: Using 'aws s3' commands on case-insensitive filesystems
+:category: S3
+:related command: s3 cp, s3 sync, s3 mv
+
+
+This page explains how to detect and handle potential case conflicts when
+downloading multiple objects from S3 to a local case-insensitive filesystem
+using a single AWS CLI command.
+
+Case conflicts
+==============
+S3 object keys are case-sensitive meaning that a bucket can have a set of
+key names that differ only by case, for example, ``a.txt`` and ``A.txt``.
+
+The AWS CLI offers high-level S3 commands that manage transfers of
+multiple S3 objects using a single command:
+
+* ``aws s3 sync``
+* ``aws s3 cp --recursive``
+* ``aws s3 mv --recursive``
+
+Case conflicts can occur on case-insensitive filesystems when an S3 bucket
+has multiple objects whose keys differ only by case and a single AWS CLI
+command is called to download multiple S3 objects **OR** a local file
+already exists whose name differs only by case.
+
+For example, consider an S3 bucket with the following stored objects:
+
+* ``a.txt``
+* ``A.txt``
+
+When the following AWS CLI command is called, the AWS CLI will submit
+requests to download ``a.txt`` and ``A.txt``. Since only
+one can exist on a case-insensitive filesystem, the last download to finish
+will be the file that's locally available.
+
+.. code-block::
+
+ aws s3 sync s3://examplebucket ./mylocaldir
+
+Detecting and handling case conflicts
+=====================================
+To detect and handle case conflicts, you can specify the ``--case-conflict``
+parameter. The following values are valid options:
+
+* ``error`` - When a case conflict is detected, the command will immediately
+ fail and abort in-progress downloads.
+* ``warn`` - When a case conflict is detected, the AWS CLI will
+ display a warning.
+* ``skip`` - When a case conflict is detected, the command will skip
+ downloading the object and continue and display a warning.
+* ``ignore`` - (Default) Case conflicts will not be detected or handled.
+
+
+Continuing the prior example, the following describes what happens when
+appending the ``--case-conflict`` parameter with possible values:
+
+``--case-conflict error``
+
+1. Submit a download request for ``A.txt``.
+2. Detect that ``a.txt`` conflicts with an object that's been submitted for download.
+3. Throw an error. If ``A.txt`` finished downloading, it will be locally available. Otherwise, the download request for ``A.txt`` will be aborted.
+
+``--case-conflict warn``
+
+1. Submit a download request for ``A.txt``.
+2. Detect that ``a.txt`` conflicts with an object that's been submitted for download.
+3. Display a warning.
+4. Submit a download request for ``a.txt``, downloading ``A.txt`` and ``a.txt`` in parallel.
+
+``--case-conflict skip``
+
+1. Submit a download request for ``A.txt``.
+2. Detect that ``a.txt`` conflicts with an object that's been submitted for download.
+3. Skip downloading ``a.txt`` and continue.
+
+``--case-conflict ignore``
+
+1. Submit a download request for ``A.txt``.
+2. Submit a download request for ``a.txt``, downloading ``A.txt`` and ``a.txt`` in parallel.
+
+If your local filesystem is case-sensitive, there's no need to detect and
+handle case conflicts. We recommend setting ``--case-conflict ignore``
+in this case.
+
+S3 Express directory buckets
+============================
+Detecting case conflicts is **NOT** supported when the source is an S3 Express
+directory bucket. When operating on directory buckets, valid values for the
+``--case-conflict`` parameter are:
+
+* ``warn``
+* ``ignore``
+
+The following values are invalid when operating on directory buckets:
+
+* ``error``
+* ``skip``
+
+For example, calling the following command will fail:
+
+.. code-block::
+
+ aws s3 cp s3://mydirbucket--usw2-az1--x-s3 ./mylocaldir --recursive --case-conflict error
\ No newline at end of file
diff --git a/awscli/topics/topic-tags.json b/awscli/topics/topic-tags.json
index 9d3a684bb06b..765424a8a6f1 100644
--- a/awscli/topics/topic-tags.json
+++ b/awscli/topics/topic-tags.json
@@ -69,5 +69,21 @@
"title": [
"AWS CLI S3 FAQ"
]
+ },
+ "s3-case-insensitivity": {
+ "category": [
+ "S3"
+ ],
+ "description": [
+ "Using 'aws s3' commands on case-insensitive filesystems"
+ ],
+ "related command": [
+ "s3 cp",
+ "s3 sync",
+ "s3 mv"
+ ],
+ "title": [
+ "AWS CLI S3 Case-Insensitivity"
+ ]
}
}
\ No newline at end of file
diff --git a/tests/functional/s3/test_cp_command.py b/tests/functional/s3/test_cp_command.py
index 6b13278751b0..507d133a32e2 100644
--- a/tests/functional/s3/test_cp_command.py
+++ b/tests/functional/s3/test_cp_command.py
@@ -13,11 +13,12 @@
# language governing permissions and limitations under the License.
import os
-from awscli.testutils import BaseAWSCommandParamsTest
+from awscli.testutils import BaseAWSCommandParamsTest, skip_if_windows
from awscli.testutils import capture_input
from awscli.testutils import mock
from awscli.compat import BytesIO
from tests.functional.s3 import BaseS3TransferCommandTest
+from tests.functional.s3.test_sync_command import TestSyncCaseConflict
from tests import requires_crt
@@ -1313,3 +1314,55 @@ def test_accepts_mrap_arns_with_slash(self):
self.put_object_request(mrap_arn, 'mykey')
]
)
+
+
+class TestCpRecursiveCaseConflict(TestSyncCaseConflict):
+ prefix = 's3 cp --recursive '
+
+ def test_ignore_by_default(self):
+ self.files.create_file(self.lower_key, 'mycontent')
+ # Note there's no --case-conflict param.
+ cmd = f"{self.prefix} s3://bucket {self.files.rootdir}"
+ self.parsed_responses = [
+ self.list_objects_response([self.upper_key]),
+ self.get_object_response(),
+ ]
+ # Expect success, so not error mode.
+ _, stderr, _ = self.run_cmd(cmd, expected_rc=0)
+ # No warnings in stderr, so not warn or skip mode.
+ assert not stderr
+
+
+class TestS3ExpressCpRecursive(BaseCPCommandTest):
+ prefix = 's3 cp --recursive '
+
+ def test_s3_express_error_raises_exception(self):
+ cmd = (
+ f"{self.prefix} s3://bucket--usw2-az1--x-s3 {self.files.rootdir} "
+ "--case-conflict error"
+ )
+ _, stderr, _ = self.run_cmd(cmd, expected_rc=255)
+ assert "`error` is not a valid value" in stderr
+
+ def test_s3_express_skip_raises_exception(self):
+ cmd = (
+ f"{self.prefix} s3://bucket--usw2-az1--x-s3 {self.files.rootdir} "
+ "--case-conflict skip"
+ )
+ _, stderr, _ = self.run_cmd(cmd, expected_rc=255)
+ assert "`skip` is not a valid value" in stderr
+
+ @skip_if_windows("Can't rename to same file")
+ def test_s3_express_warn_emits_warning(self):
+ cmd = (
+ f"{self.prefix} s3://bucket--usw2-az1--x-s3 {self.files.rootdir} "
+ "--case-conflict warn"
+ )
+ self.parsed_responses = [
+ self.list_objects_response(['a.txt', 'A.txt']),
+ self.get_object_response(),
+ self.get_object_response(),
+ ]
+
+ _, stderr, _ = self.run_cmd(cmd, expected_rc=0)
+ assert "warning: Recursive copies/moves" in stderr
diff --git a/tests/functional/s3/test_mv_command.py b/tests/functional/s3/test_mv_command.py
index 5ecd434d9aa8..dcddbad7ca5b 100644
--- a/tests/functional/s3/test_mv_command.py
+++ b/tests/functional/s3/test_mv_command.py
@@ -13,7 +13,9 @@
# language governing permissions and limitations under the License.
from awscli.customizations.s3.utils import S3PathResolver
from awscli.compat import BytesIO
+from awscli.testutils import skip_if_case_sensitive
from tests.functional.s3 import BaseS3TransferCommandTest
+from tests.functional.s3.test_sync_command import TestSyncCaseConflict
from tests import requires_crt
@@ -446,3 +448,84 @@ def test_raises_warning_if_validation_not_set_source(self):
"myaccesspoint/key "
"s3://bucket/key")
self.assert_raises_warning(cmdline)
+
+
+class TestMvRecursiveCaseConflict(TestSyncCaseConflict):
+ prefix = 's3 mv --recursive '
+
+ @skip_if_case_sensitive()
+ def test_warn_with_existing_file(self):
+ self.files.create_file(self.lower_key, 'mycontent')
+ cmd = (
+ f"{self.prefix} s3://bucket {self.files.rootdir} "
+ "--case-conflict warn"
+ )
+ self.parsed_responses = [
+ self.list_objects_response([self.upper_key]),
+ self.get_object_response(),
+ self.delete_object_response(),
+ ]
+ _, stderr, _ = self.run_cmd(cmd, expected_rc=0)
+ assert f"warning: Downloading bucket/{self.upper_key}" in stderr
+
+ def test_warn_with_case_conflicts_in_s3(self):
+ # This test case becomes very flaky because mv
+ # performs a get and delete operation twice.
+ # Delete is called after the get finishes, but
+ # the order of responses become non-deterministic
+ # when downloading multiple objects. The order
+ # could be [get, get, delete, delete] or
+ # [get, delete, get, delete]. Rather than making
+ # complex changes to patch this behavior, we're
+ # delegating the assertions to the sync and cp
+ # test suites.
+ pass
+
+ def test_skip_with_case_conflicts_in_s3(self):
+ cmd = (
+ f"{self.prefix} s3://bucket {self.files.rootdir} "
+ "--case-conflict skip"
+ )
+ self.parsed_responses = [
+ self.list_objects_response([self.upper_key, self.lower_key]),
+ self.get_object_response(),
+ self.delete_object_response(),
+ ]
+ _, stderr, _ = self.run_cmd(cmd, expected_rc=0)
+ assert f"warning: Skipping bucket/{self.lower_key}" in stderr
+
+ def test_ignore_with_existing_file(self):
+ self.files.create_file(self.lower_key, 'mycontent')
+ cmd = (
+ f"{self.prefix} s3://bucket {self.files.rootdir} "
+ "--case-conflict ignore"
+ )
+ self.parsed_responses = [
+ self.list_objects_response([self.upper_key]),
+ self.get_object_response(),
+ self.delete_object_response(),
+ ]
+ self.run_cmd(cmd, expected_rc=0)
+
+ def test_ignore_with_case_conflicts_in_s3(self):
+ pass
+
+
+class TestS3ExpressMvRecursive(BaseS3TransferCommandTest):
+ prefix = 's3 mv --recursive '
+
+ def test_s3_express_error_raises_exception(self):
+ cmd = (
+ f"{self.prefix} s3://bucket--usw2-az1--x-s3 {self.files.rootdir} "
+ "--case-conflict error"
+ )
+ _, stderr, _ = self.run_cmd(cmd, expected_rc=255)
+ assert "`error` is not a valid value" in stderr
+
+ def test_s3_express_skip_raises_exception(self):
+ cmd = (
+ f"{self.prefix} s3://bucket--usw2-az1--x-s3 {self.files.rootdir} "
+ "--case-conflict skip"
+ )
+ _, stderr, _ = self.run_cmd(cmd, expected_rc=255)
+ assert "`skip` is not a valid value" in stderr
diff --git a/tests/functional/s3/test_sync_command.py b/tests/functional/s3/test_sync_command.py
index bdcdbfcc29b9..f67cd07f13a4 100644
--- a/tests/functional/s3/test_sync_command.py
+++ b/tests/functional/s3/test_sync_command.py
@@ -12,7 +12,7 @@
# language governing permissions and limitations under the License.
import os
-from awscli.testutils import mock, cd
+from awscli.testutils import cd, mock, skip_if_case_sensitive, skip_if_windows
from awscli.compat import BytesIO
from tests.functional.s3 import BaseS3TransferCommandTest
@@ -430,3 +430,110 @@ def test_compatible_with_sync_with_local_directory_like_directory_bucket(self):
# Just asserting that command validated and made an API call
self.assertEqual(len(self.operations_called), 1)
self.assertEqual(self.operations_called[0][0].name, 'ListObjectsV2')
+
+
+class TestSyncCaseConflict(BaseS3TransferCommandTest):
+ prefix = 's3 sync '
+
+ def setUp(self):
+ super().setUp()
+ self.lower_key = 'a.txt'
+ self.upper_key = 'A.txt'
+
+ @skip_if_case_sensitive()
+ def test_error_with_existing_file(self):
+ self.files.create_file(self.lower_key, 'mycontent')
+ cmd = (
+ f"{self.prefix} s3://bucket {self.files.rootdir} "
+ "--case-conflict error"
+ )
+ self.parsed_responses = [self.list_objects_response([self.upper_key])]
+ _, stderr, _ = self.run_cmd(cmd, expected_rc=1)
+ assert f"Failed to download bucket/{self.upper_key}" in stderr
+
+ def test_error_with_case_conflicts_in_s3(self):
+ cmd = (
+ f"{self.prefix} s3://bucket {self.files.rootdir} "
+ "--case-conflict error"
+ )
+ self.parsed_responses = [
+ self.list_objects_response([self.upper_key, self.lower_key])
+ ]
+ _, stderr, _ = self.run_cmd(cmd, expected_rc=1)
+ assert f"Failed to download bucket/{self.lower_key}" in stderr
+
+ @skip_if_case_sensitive()
+ def test_warn_with_existing_file(self):
+ self.files.create_file(self.lower_key, 'mycontent')
+ cmd = (
+ f"{self.prefix} s3://bucket {self.files.rootdir} "
+ "--case-conflict warn"
+ )
+ self.parsed_responses = [
+ self.list_objects_response([self.upper_key]),
+ self.get_object_response(),
+ ]
+ _, stderr, _ = self.run_cmd(cmd, expected_rc=0)
+ assert f"warning: Downloading bucket/{self.upper_key}" in stderr
+
+ @skip_if_windows("Can't rename to same file")
+ def test_warn_with_case_conflicts_in_s3(self):
+ cmd = (
+ f"{self.prefix} s3://bucket {self.files.rootdir} "
+ "--case-conflict warn"
+ )
+ self.parsed_responses = [
+ self.list_objects_response([self.upper_key, self.lower_key]),
+ self.get_object_response(),
+ self.get_object_response(),
+ ]
+ _, stderr, _ = self.run_cmd(cmd, expected_rc=0)
+ assert f"warning: Downloading bucket/{self.lower_key}" in stderr
+
+ @skip_if_case_sensitive()
+ def test_skip_with_existing_file(self):
+ self.files.create_file(self.lower_key, 'mycontent')
+ cmd = (
+ f"{self.prefix} s3://bucket {self.files.rootdir} "
+ "--case-conflict skip"
+ )
+ self.parsed_responses = [self.list_objects_response([self.upper_key])]
+ _, stderr, _ = self.run_cmd(cmd, expected_rc=0)
+ assert f"warning: Skipping bucket/{self.upper_key}" in stderr
+
+ def test_skip_with_case_conflicts_in_s3(self):
+ cmd = (
+ f"{self.prefix} s3://bucket {self.files.rootdir} "
+ "--case-conflict skip"
+ )
+ self.parsed_responses = [
+ self.list_objects_response([self.upper_key, self.lower_key]),
+ self.get_object_response(),
+ ]
+ _, stderr, _ = self.run_cmd(cmd, expected_rc=0)
+ assert f"warning: Skipping bucket/{self.lower_key}" in stderr
+
+ def test_ignore_with_existing_file(self):
+ self.files.create_file(self.lower_key, 'mycontent')
+ cmd = (
+ f"{self.prefix} s3://bucket {self.files.rootdir} "
+ "--case-conflict ignore"
+ )
+ self.parsed_responses = [
+ self.list_objects_response([self.upper_key]),
+ self.get_object_response(),
+ ]
+ self.run_cmd(cmd, expected_rc=0)
+
+ @skip_if_windows("Can't rename to same file")
+ def test_ignore_with_case_conflicts_in_s3(self):
+ cmd = (
+ f"{self.prefix} s3://bucket {self.files.rootdir} "
+ "--case-conflict ignore"
+ )
+ self.parsed_responses = [
+ self.list_objects_response([self.upper_key, self.lower_key]),
+ self.get_object_response(),
+ self.get_object_response(),
+ ]
+ self.run_cmd(cmd, expected_rc=0)
diff --git a/tests/unit/customizations/s3/syncstrategy/test_base.py b/tests/unit/customizations/s3/syncstrategy/test_base.py
index c6dba74b28a3..9e8634d8c13e 100644
--- a/tests/unit/customizations/s3/syncstrategy/test_base.py
+++ b/tests/unit/customizations/s3/syncstrategy/test_base.py
@@ -13,7 +13,7 @@
import datetime
from awscli.customizations.s3.filegenerator import FileStat
-from awscli.customizations.s3.syncstrategy.base import BaseSync, \
+from awscli.customizations.s3.syncstrategy.base import AlwaysSync, BaseSync, \
SizeAndLastModifiedSync, MissingFileSync, NeverSync
from awscli.testutils import mock, unittest
@@ -290,5 +290,32 @@ def test_determine_should_sync(self):
self.assertTrue(should_sync)
+class TestAlwaysSync:
+ def test_determine_should_sync(self):
+ always_sync = AlwaysSync()
+ time = datetime.datetime.now()
+ src_file = FileStat(
+ src='',
+ dest='',
+ compare_key='test.py',
+ size=10,
+ last_update=time,
+ src_type='s3',
+ dest_type='local',
+ operation_name='download',
+ )
+ dest_file = FileStat(
+ src='',
+ dest='',
+ compare_key='test.py',
+ size=10,
+ last_update=time,
+ src_type='local',
+ dest_type='s3',
+ operation_name='',
+ )
+ assert always_sync.determine_should_sync(src_file, dest_file) is True
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/tests/unit/customizations/s3/syncstrategy/test_caseconflict.py b/tests/unit/customizations/s3/syncstrategy/test_caseconflict.py
new file mode 100644
index 000000000000..536e40f2a9be
--- /dev/null
+++ b/tests/unit/customizations/s3/syncstrategy/test_caseconflict.py
@@ -0,0 +1,176 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+import pytest
+
+from awscli.customizations.s3.filegenerator import FileStat
+from awscli.customizations.s3.syncstrategy.caseconflict import (
+ CaseConflictException,
+ CaseConflictSync,
+)
+from awscli.testutils import skip_if_case_sensitive
+
+
+@pytest.fixture
+def lower_compare_key():
+ return 'a.txt'
+
+
+@pytest.fixture
+def lower_temp_filepath(lower_compare_key, tmp_path):
+ p = tmp_path / lower_compare_key
+ return p.resolve()
+
+
+@pytest.fixture
+def lower_file_stat(lower_compare_key, lower_temp_filepath):
+ return FileStat(
+ src=f'bucket/{lower_compare_key}',
+ dest=lower_temp_filepath,
+ compare_key=lower_compare_key,
+ )
+
+
+@pytest.fixture
+def upper_compare_key():
+ return 'A.txt'
+
+
+@pytest.fixture
+def upper_temp_filepath(upper_compare_key, tmp_path):
+ p = tmp_path / upper_compare_key
+ p.write_text('foobar')
+ return p.resolve()
+
+
+@pytest.fixture
+def upper_file_stat(upper_compare_key, upper_temp_filepath):
+ return FileStat(
+ src=f'bucket/{upper_compare_key}',
+ dest=upper_temp_filepath,
+ compare_key=upper_compare_key,
+ )
+
+
+@pytest.fixture
+def upper_key(upper_file_stat):
+ return upper_file_stat.compare_key.lower()
+
+
+@pytest.fixture
+def no_conflict_file_stat(tmp_path):
+ return FileStat(
+ src='bucket/foo.txt',
+ # Note that this file was never written to.
+ dest=f'{tmp_path}/foo.txt',
+ compare_key='foo.txt',
+ )
+
+
+class TestCaseConflictSync:
+ def test_error_with_no_conflict_syncs(
+ self, no_conflict_file_stat, lower_file_stat, capsys
+ ):
+ case_conflict = CaseConflictSync(on_case_conflict='error')
+ should_sync = case_conflict.determine_should_sync(
+ no_conflict_file_stat, lower_file_stat
+ )
+ captured = capsys.readouterr()
+ assert should_sync is True
+ assert not captured.err
+
+ @skip_if_case_sensitive()
+ def test_error_with_existing_file(self, lower_file_stat, upper_file_stat):
+ case_conflict_sync = CaseConflictSync(on_case_conflict='error')
+ with pytest.raises(CaseConflictException) as exc:
+ case_conflict_sync.determine_should_sync(
+ lower_file_stat,
+ upper_file_stat,
+ )
+ assert 'Failed to download bucket/a.txt' in str(exc.value)
+
+ def test_error_with_case_conflicts_in_s3(
+ self, lower_file_stat, upper_file_stat, upper_key
+ ):
+ case_conflict_sync = CaseConflictSync(
+ on_case_conflict='error', submitted={upper_key}
+ )
+ with pytest.raises(CaseConflictException) as exc:
+ case_conflict_sync.determine_should_sync(
+ lower_file_stat,
+ upper_file_stat,
+ )
+ assert 'Failed to download bucket/a.txt' in str(exc.value)
+
+ def test_warn_with_no_conflict_syncs(
+ self, no_conflict_file_stat, lower_file_stat, capsys
+ ):
+ case_conflict = CaseConflictSync(on_case_conflict='warn')
+ should_sync = case_conflict.determine_should_sync(
+ no_conflict_file_stat, lower_file_stat
+ )
+ captured = capsys.readouterr()
+ assert should_sync is True
+ assert not captured.err
+
+ @skip_if_case_sensitive()
+ def test_warn_with_existing_file(
+ self, lower_file_stat, upper_file_stat, capsys
+ ):
+ case_conflict_sync = CaseConflictSync(on_case_conflict='warn')
+ should_sync = case_conflict_sync.determine_should_sync(
+ lower_file_stat, upper_file_stat
+ )
+ captured = capsys.readouterr()
+ assert should_sync is True
+ assert 'warning: ' in captured.err
+
+ def test_warn_with_case_conflicts_in_s3(
+ self, lower_file_stat, upper_file_stat, upper_key, capsys
+ ):
+ case_conflict_sync = CaseConflictSync(
+ on_case_conflict='warn', submitted={upper_key}
+ )
+ should_sync = case_conflict_sync.determine_should_sync(
+ lower_file_stat,
+ upper_file_stat,
+ )
+ captured = capsys.readouterr()
+ assert should_sync is True
+ assert 'warning: ' in captured.err
+
+ def test_skip_with_no_conflict_syncs(
+ self, no_conflict_file_stat, lower_file_stat, capsys
+ ):
+ case_conflict = CaseConflictSync(on_case_conflict='skip')
+ should_sync = case_conflict.determine_should_sync(
+ no_conflict_file_stat, lower_file_stat
+ )
+ captured = capsys.readouterr()
+ assert should_sync is True
+ assert not captured.err
+
+ @skip_if_case_sensitive()
+ def test_skip_with_existing_file(
+ self, lower_file_stat, upper_file_stat, capsys
+ ):
+ case_conflict_sync = CaseConflictSync(on_case_conflict='skip')
+ should_sync = case_conflict_sync.determine_should_sync(
+ lower_file_stat, upper_file_stat
+ )
+ captured = capsys.readouterr()
+ assert should_sync is False
+ assert 'warning: Skipping' in captured.err
+
+ def test_skip_with_case_conflicts_in_s3(
+ self, lower_file_stat, upper_file_stat, upper_key, capsys
+ ):
+ case_conflict_sync = CaseConflictSync(
+ on_case_conflict='skip', submitted={upper_key}
+ )
+ should_sync = case_conflict_sync.determine_should_sync(
+ lower_file_stat,
+ upper_file_stat,
+ )
+ captured = capsys.readouterr()
+ assert should_sync is False
+ assert 'warning: Skipping' in captured.err
diff --git a/tests/unit/customizations/s3/test_fileinfobuilder.py b/tests/unit/customizations/s3/test_fileinfobuilder.py
index fdca6babdc11..e1284288a948 100644
--- a/tests/unit/customizations/s3/test_fileinfobuilder.py
+++ b/tests/unit/customizations/s3/test_fileinfobuilder.py
@@ -28,7 +28,9 @@ def test_info_setter(self):
src_type='src_type', dest_type='dest_type',
operation_name='operation_name',
response_data='associated_response_data',
- etag='etag',)]
+ etag='etag',
+ case_conflict_submitted='case_conflict_submitted',
+ case_conflict_key='case_conflict_key',)]
file_infos = info_setter.call(files)
for file_info in file_infos:
attributes = file_info.__dict__.keys()
diff --git a/tests/unit/customizations/s3/test_subcommands.py b/tests/unit/customizations/s3/test_subcommands.py
index 25fc257fcb21..82d85bf6a25f 100644
--- a/tests/unit/customizations/s3/test_subcommands.py
+++ b/tests/unit/customizations/s3/test_subcommands.py
@@ -503,7 +503,8 @@ def test_run_cp_get(self):
'paths_type': 's3local', 'region': 'us-east-1',
'endpoint_url': None, 'verify_ssl': None,
'follow_symlinks': True, 'page_size': None,
- 'is_stream': False, 'source_region': None, 'v2_debug': False}
+ 'is_stream': False, 'source_region': None, 'v2_debug': False,
+ 'case_conflict': 'ignore'}
self.parsed_responses = [{"ETag": "abcd", "ContentLength": 100,
"LastModified": "2014-01-09T20:45:49.000Z"}]
config = RuntimeConfig().build_config()
diff --git a/tests/unit/customizations/s3/test_utils.py b/tests/unit/customizations/s3/test_utils.py
index 3754ffd662b8..96d4e216c44c 100644
--- a/tests/unit/customizations/s3/test_utils.py
+++ b/tests/unit/customizations/s3/test_utils.py
@@ -40,7 +40,7 @@
ProvideLastModifiedTimeSubscriber, DirectoryCreatorSubscriber,
DeleteSourceObjectSubscriber, DeleteSourceFileSubscriber,
DeleteCopySourceObjectSubscriber, NonSeekableStream, CreateDirectoryError,
- S3PathResolver)
+ S3PathResolver, CaseConflictCleanupSubscriber)
from awscli.customizations.s3.results import WarningResult
from tests.unit.customizations.s3 import FakeTransferFuture
from tests.unit.customizations.s3 import FakeTransferFutureMeta
@@ -1207,3 +1207,17 @@ def test_alias_suffixes_dont_match_accesspoint_arns(
def test_has_underlying_s3_path(self, path, expected_has_underlying_s3_path):
has_underlying_s3_path = S3PathResolver.has_underlying_s3_path(path)
assert has_underlying_s3_path == expected_has_underlying_s3_path
+
+
+class TestCaseConflictCleanupSubscriber:
+ def test_on_done_removes_key_from_set(self):
+ submitted = {123, 456}
+ subscriber = CaseConflictCleanupSubscriber(submitted, 123)
+ subscriber.on_done(future=None)
+ assert submitted == {456}
+
+ def test_on_done_handles_missing_key(self):
+ submitted = {456}
+ subscriber = CaseConflictCleanupSubscriber(submitted, 123)
+ subscriber.on_done(future=None)
+ assert submitted == {456}