Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changes/next-release/enhancement-s3-49696.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"type": "enhancement",
"category": "``s3``",
"description": "Adds new parameter ``--case-conflict`` that configures how case conflicts are handled on case-insensitive filesystems"
}
5 changes: 4 additions & 1 deletion awscli/customizations/s3/filegenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ def __init__(self, directory, filename):
class FileStat(object):
def __init__(self, src, dest=None, compare_key=None, size=None,
last_update=None, src_type=None, dest_type=None,
operation_name=None, response_data=None, etag=None):
operation_name=None, response_data=None, etag=None,
case_conflict_submitted=None, case_conflict_key=None,):
self.src = src
self.dest = dest
self.compare_key = compare_key
Expand All @@ -105,6 +106,8 @@ def __init__(self, src, dest=None, compare_key=None, size=None,
self.operation_name = operation_name
self.response_data = response_data
self.etag = etag
self.case_conflict_submitted = case_conflict_submitted
self.case_conflict_key = case_conflict_key


class FileGenerator(object):
Expand Down
5 changes: 4 additions & 1 deletion awscli/customizations/s3/fileinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ def __init__(self, src, dest=None, compare_key=None, size=None,
last_update=None, src_type=None, dest_type=None,
operation_name=None, client=None, parameters=None,
source_client=None, is_stream=False,
associated_response_data=None, etag=None):
associated_response_data=None, etag=None,
case_conflict_submitted=None, case_conflict_key=None,):
self.src = src
self.src_type = src_type
self.operation_name = operation_name
Expand All @@ -60,6 +61,8 @@ def __init__(self, src, dest=None, compare_key=None, size=None,
self.is_stream = is_stream
self.associated_response_data = associated_response_data
self.etag = etag
self.case_conflict_submitted = case_conflict_submitted
self.case_conflict_key = case_conflict_key

def is_glacier_compatible(self):
"""Determines if a file info object is glacier compatible
Expand Down
6 changes: 6 additions & 0 deletions awscli/customizations/s3/fileinfobuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ def _inject_info(self, file_base):
file_info_attr['is_stream'] = self._is_stream
file_info_attr['associated_response_data'] = file_base.response_data
file_info_attr['etag'] = file_base.etag
file_info_attr['case_conflict_submitted'] = getattr(
file_base, 'case_conflict_submitted', None
)
file_info_attr['case_conflict_key'] = getattr(
file_base, 'case_conflict_key', None
)

# This is a bit quirky. The below conditional hinges on the --delete
# flag being set, which only occurs during a sync command. The source
Expand Down
8 changes: 8 additions & 0 deletions awscli/customizations/s3/s3handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from awscli.customizations.s3.utils import DeleteSourceFileSubscriber
from awscli.customizations.s3.utils import DeleteSourceObjectSubscriber
from awscli.customizations.s3.utils import DeleteCopySourceObjectSubscriber
from awscli.customizations.s3.utils import CaseConflictCleanupSubscriber
from awscli.compat import get_binary_stdin


Expand Down Expand Up @@ -403,6 +404,13 @@ def _add_additional_subscribers(self, subscribers, fileinfo):
if self._cli_params.get('is_move', False):
subscribers.append(DeleteSourceObjectSubscriber(
fileinfo.source_client))
if fileinfo.case_conflict_submitted is not None:
subscribers.append(
CaseConflictCleanupSubscriber(
fileinfo.case_conflict_submitted,
fileinfo.case_conflict_key,
)
)

def _submit_transfer_request(self, fileinfo, extra_args, subscribers):
bucket, key = find_bucket_key(fileinfo.src)
Expand Down
129 changes: 124 additions & 5 deletions awscli/customizations/s3/subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
S3PathResolver
from awscli.customizations.utils import uni_print
from awscli.customizations.s3.syncstrategy.base import MissingFileSync, \
SizeAndLastModifiedSync, NeverSync
SizeAndLastModifiedSync, NeverSync, AlwaysSync
from awscli.customizations.s3.syncstrategy.caseconflict import CaseConflictSync
from awscli.customizations.s3 import transferconfig
from awscli.utils import resolve_v2_debug_mode

Expand Down Expand Up @@ -482,6 +483,33 @@
)
}

CASE_CONFLICT = {
'name': 'case-conflict',
'choices': [
'ignore',
'skip',
'warn',
'error',
],
'default': 'ignore',
'help_text': (
"Configures behavior when attempting to download multiple objects "
"whose keys differ only by case, which can cause undefined behavior "
"on case-insensitive filesystems. "
"This parameter only applies for commands that perform multiple S3 "
"to local downloads. "
f"See <a href='{CaseConflictSync.DOC_URI}'>Handling case "
"conflicts</a> for details. Valid values are: "
"<ul>"
"<li>``error`` - Raise an error and abort downloads.</li>"
"<li>``warn`` - Emit a warning and download the object.</li>"
"<li>``skip`` - Skip downloading the object.</li>"
"<li>``ignore`` - The default value. Ignore the conflict and "
"download the object.</li>"
"</ul>"
),
}

TRANSFER_ARGS = [DRYRUN, QUIET, INCLUDE, EXCLUDE, ACL,
FOLLOW_SYMLINKS, NO_FOLLOW_SYMLINKS, NO_GUESS_MIME_TYPE,
SSE, SSE_C, SSE_C_KEY, SSE_KMS_KEY_ID, SSE_C_COPY_SOURCE,
Expand Down Expand Up @@ -807,7 +835,8 @@ class CpCommand(S3TransferCommand):
"or <S3Uri> <S3Uri>"
ARG_TABLE = [{'name': 'paths', 'nargs': 2, 'positional_arg': True,
'synopsis': USAGE}] + TRANSFER_ARGS + \
[METADATA, METADATA_DIRECTIVE, EXPECTED_SIZE, RECURSIVE]
[METADATA, METADATA_DIRECTIVE, EXPECTED_SIZE, RECURSIVE,
CASE_CONFLICT]


class MvCommand(S3TransferCommand):
Expand All @@ -817,7 +846,8 @@ class MvCommand(S3TransferCommand):
"or <S3Uri> <S3Uri>"
ARG_TABLE = [{'name': 'paths', 'nargs': 2, 'positional_arg': True,
'synopsis': USAGE}] + TRANSFER_ARGS +\
[METADATA, METADATA_DIRECTIVE, RECURSIVE, VALIDATE_SAME_S3_PATHS]
[METADATA, METADATA_DIRECTIVE, RECURSIVE, VALIDATE_SAME_S3_PATHS,
CASE_CONFLICT]


class RmCommand(S3TransferCommand):
Expand All @@ -839,7 +869,7 @@ class SyncCommand(S3TransferCommand):
"<LocalPath> or <S3Uri> <S3Uri>"
ARG_TABLE = [{'name': 'paths', 'nargs': 2, 'positional_arg': True,
'synopsis': USAGE}] + TRANSFER_ARGS + \
[METADATA, METADATA_DIRECTIVE]
[METADATA, METADATA_DIRECTIVE, CASE_CONFLICT]


class MbCommand(S3Command):
Expand Down Expand Up @@ -1004,7 +1034,16 @@ def choose_sync_strategies(self):
# Set the default strategies.
sync_strategies['file_at_src_and_dest_sync_strategy'] = \
SizeAndLastModifiedSync()
sync_strategies['file_not_at_dest_sync_strategy'] = MissingFileSync()
if self._should_handle_case_conflicts():
sync_strategies['file_not_at_dest_sync_strategy'] = (
CaseConflictSync(
on_case_conflict=self.parameters['case_conflict']
)
)
else:
sync_strategies['file_not_at_dest_sync_strategy'] = (
MissingFileSync()
)
sync_strategies['file_not_at_src_sync_strategy'] = NeverSync()

# Determine what strategies to override if any.
Expand Down Expand Up @@ -1138,6 +1177,12 @@ def run(self):
'filters': [create_filter(self.parameters)],
'file_info_builder': [file_info_builder],
's3_handler': [s3_transfer_handler]}
if self._should_handle_case_conflicts():
self._handle_case_conflicts(
command_dict,
rev_files,
rev_generator,
)
elif self.cmd == 'rm':
command_dict = {'setup': [files],
'file_generator': [file_generator],
Expand All @@ -1150,6 +1195,12 @@ def run(self):
'filters': [create_filter(self.parameters)],
'file_info_builder': [file_info_builder],
's3_handler': [s3_transfer_handler]}
if self._should_handle_case_conflicts():
self._handle_case_conflicts(
command_dict,
rev_files,
rev_generator,
)

files = command_dict['setup']
while self.instructions:
Expand Down Expand Up @@ -1215,6 +1266,74 @@ def _map_sse_c_params(self, request_parameters, paths_type):
}
)

def _should_handle_case_conflicts(self):
return (
self.cmd in {'sync', 'cp', 'mv'}
and self.parameters.get('paths_type') == 's3local'
and self.parameters['case_conflict'] != 'ignore'
and self.parameters.get('dir_op')
)

def _handle_case_conflicts(self, command_dict, rev_files, rev_generator):
# Objects are not returned in lexicographical order when
# operated on S3 Express directory buckets. This is required
# for sync operations to behave correctly, which is what
# recursive copies and moves fall back to so potential case
# conflicts can be detected and handled.
if not is_s3express_bucket(
split_s3_bucket_key(self.parameters['src'])[0]
):
self._modify_instructions_for_case_conflicts(
command_dict, rev_files, rev_generator
)
return
# `skip` and `error` are not valid choices in this case because
# it's not possible to detect case conflicts.
if self.parameters['case_conflict'] not in {'ignore', 'warn'}:
raise ValueError(
f"`{self.parameters['case_conflict']}` is not a valid value "
"for `--case-conflict` when operating on S3 Express "
"directory buckets. Valid values: `warn`, `ignore`."
)
msg = (
"warning: Recursive copies/moves from an S3 Express "
"directory bucket to a case-insensitive local filesystem "
"may result in undefined behavior if there are "
"S3 object key names that differ only by case. To disable "
"this warning, set the `--case-conflict` parameter to `ignore`. "
f"For more information, see {CaseConflictSync.DOC_URI}."
)
uni_print(msg, sys.stderr)

def _modify_instructions_for_case_conflicts(
self, command_dict, rev_files, rev_generator
):
# Command will perform recursive S3 to local downloads.
# Checking for potential case conflicts requires knowledge
# of local files. Instead of writing a separate validation
# mechanism for recursive downloads, we modify the instructions
# to mimic a sync command.
sync_strategies = {
# Local filename exists with exact case match. Always sync
# because it's a copy operation.
'file_at_src_and_dest_sync_strategy': AlwaysSync(),
# Local filename either doesn't exist or differs only by case.
# Let `CaseConflictSync` determine which it is and handle it
# according to configured `--case-conflict` parameter.
'file_not_at_dest_sync_strategy': CaseConflictSync(
on_case_conflict=self.parameters['case_conflict']
),
# Copy is one-way so never sync if not at source.
'file_not_at_src_sync_strategy': NeverSync(),
}
command_dict['setup'].append(rev_files)
command_dict['file_generator'].append(rev_generator)
command_dict['filters'].append(create_filter(self.parameters))
command_dict['comparator'] = [Comparator(**sync_strategies)]
self.instructions.insert(
self.instructions.index('file_info_builder'), 'comparator'
)


class CommandParameters(object):
"""
Expand Down
9 changes: 9 additions & 0 deletions awscli/customizations/s3/syncstrategy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,3 +254,12 @@ def determine_should_sync(self, src_file, dest_file):
LOG.debug("syncing: %s -> %s, file does not exist at destination",
src_file.src, src_file.dest)
return True


class AlwaysSync(BaseSync):
def __init__(self, sync_type='file_at_src_and_dest'):
super(AlwaysSync, self).__init__(sync_type)

def determine_should_sync(self, src_file, dest_file):
LOG.debug(f"syncing: {src_file.src} -> {src_file.dest}")
return True
96 changes: 96 additions & 0 deletions awscli/customizations/s3/syncstrategy/caseconflict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
import logging
import os
import sys

from awscli.customizations.s3.syncstrategy.base import BaseSync
from awscli.customizations.utils import uni_print

LOG = logging.getLogger(__name__)


class CaseConflictException(Exception):
pass


class CaseConflictSync(BaseSync):
DOC_URI = (
"https://docs.aws.amazon.com/cli/v1/topic/"
"s3-case-insensitivity.html"
)

def __init__(
self,
sync_type='file_not_at_dest',
on_case_conflict='ignore',
submitted=None,
):
super().__init__(sync_type)
self._on_case_conflict = on_case_conflict
if submitted is None:
submitted = set()
self._submitted = submitted

@property
def submitted(self):
return self._submitted

def determine_should_sync(self, src_file, dest_file):
# `src_file.compare_key` and `dest_file.compare_key` are not equal.
# This could mean that they're completely different or differ
# only by case. eg, `/tmp/a` and `/tmp/b` versus `/tmp/a` and `/tmp/A`.
# If the source file's destination already exists, that means it
# differs only by case and the conflict needs to be handled.
should_sync = True
# Normalize compare key for case sensitivity.
lower_compare_key = src_file.compare_key.lower()
if lower_compare_key in self._submitted or os.path.exists(
src_file.dest
):
handler = getattr(self, f"_handle_{self._on_case_conflict}")
should_sync = handler(src_file)
if should_sync:
LOG.debug(f"syncing: {src_file.src} -> {src_file.dest}")
self._submitted.add(lower_compare_key)
# Set properties so that a subscriber can be created
# that removes the key from the set after download finishes.
src_file.case_conflict_submitted = self._submitted
src_file.case_conflict_key = lower_compare_key
return should_sync

@staticmethod
def _handle_ignore(src_file):
return True

@staticmethod
def _handle_skip(src_file):
msg = (
f"warning: Skipping {src_file.src} -> {src_file.dest} "
"because a file whose name differs only by case either exists "
"or is being downloaded.\n"
)
uni_print(msg, sys.stderr)
return False

@staticmethod
def _handle_warn(src_file):
msg = (
f"warning: Downloading {src_file.src} -> {src_file.dest} "
"despite a file whose name differs only by case either existing "
"or being downloaded. This behavior is not defined on "
"case-insensitive filesystems and may result in overwriting "
"existing files or race conditions between concurrent downloads. "
f"For more information, see {CaseConflictSync.DOC_URI}.\n"
)
uni_print(msg, sys.stderr)
return True

@staticmethod
def _handle_error(src_file):
msg = (
f"Failed to download {src_file.src} -> {src_file.dest} "
"because a file whose name differs only by case either exists "
"or is being downloaded."
)
raise CaseConflictException(msg)
Loading