Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 0 additions & 35 deletions awscli/customizations/s3/s3handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,43 +517,8 @@ def _submit_transfer_request(self, fileinfo, extra_args, subscribers):
def _get_warning_handlers(self):
return [
self._warn_glacier,
self._warn_if_zero_byte_file_exists_with_no_overwrite,
]

def _warn_if_zero_byte_file_exists_with_no_overwrite(self, fileinfo):
"""
Warning handler to skip zero-byte files when no_overwrite is set and file exists.

This method handles the transfer of zero-byte objects when the no-overwrite parameter is specified.
To prevent overwrite, it uses head_object to verify if the object exists at the destination:
If the object is present at destination: skip the file (return True)
If the object is not present at destination: allow transfer (return False)

:type fileinfo: FileInfo
:param fileinfo: The FileInfo object containing transfer details

:rtype: bool
:return: True if file should be skipped, False if transfer should proceed
"""
if not self._cli_params.get('no_overwrite') or (
getattr(fileinfo, 'size') and fileinfo.size > 0
):
return False

bucket, key = find_bucket_key(fileinfo.dest)
client = fileinfo.source_client
try:
client.head_object(Bucket=bucket, Key=key)
LOGGER.debug(
f"warning: skipping {fileinfo.src} -> {fileinfo.dest}, file exists at destination"
)
return True
except ClientError as e:
if e.response['Error']['Code'] == '404':
return False
else:
raise

def _format_src_dest(self, fileinfo):
src = self._format_s3_path(fileinfo.src)
dest = self._format_s3_path(fileinfo.dest)
Expand Down
33 changes: 4 additions & 29 deletions awscli/s3transfer/copies.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,6 @@ class CopySubmissionTask(SubmissionTask):
'IfNoneMatch',
]

COPY_OBJECT_ARGS_BLOCKLIST = [
'IfNoneMatch',
]

def _submit(
self, client, config, osutil, request_executor, transfer_future
Expand Down Expand Up @@ -141,24 +138,9 @@ def _submit(
# during a multipart copy.
transfer_future.meta.provide_object_etag(response.get('ETag'))

# Check for ifNoneMatch is enabled and file has content
# Special handling for 0-byte files: Since multipart copy works with object size
# and divides the object into smaller chunks, there's an edge case when the object
# size is zero. This would result in 0 parts being calculated, and the
# CompleteMultipartUpload operation throws a MalformedXML error when transferring
# 0 parts because the XML does not validate against the published schema.
# Therefore, 0-byte files are always handled via single copy request regardless
# of the multipart threshold setting.
should_overwrite = (
call_args.extra_args.get("IfNoneMatch")
and transfer_future.meta.size != 0
)
# If it is less than threshold and ifNoneMatch is not in parameters
# do a regular copy else do multipart copy.
if (
transfer_future.meta.size < config.multipart_threshold
and not should_overwrite
):
# If it is greater than threshold do a multipart copy, otherwise
# do a regular copy object.
if transfer_future.meta.size < config.multipart_threshold:
self._submit_copy_request(
client, config, osutil, request_executor, transfer_future
)
Expand All @@ -175,13 +157,6 @@ def _submit_copy_request(
# Get the needed progress callbacks for the task
progress_callbacks = get_callbacks(transfer_future, 'progress')

# Submit the request of a single copy and make sure it
# does not include any blocked arguments.
copy_object_extra_args = {
param: val
for param, val in call_args.extra_args.items()
if param not in self.COPY_OBJECT_ARGS_BLOCKLIST
}
self._transfer_coordinator.submit(
request_executor,
CopyObjectTask(
Expand All @@ -191,7 +166,7 @@ def _submit_copy_request(
"copy_source": call_args.copy_source,
"bucket": call_args.bucket,
"key": call_args.key,
"extra_args": copy_object_extra_args,
"extra_args": call_args.extra_args,
"callbacks": progress_callbacks,
"size": transfer_future.meta.size,
},
Expand Down
80 changes: 11 additions & 69 deletions tests/functional/s3/test_cp_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,90 +387,32 @@ def test_no_overwrite_flag_multipart_upload_when_object_exists_on_target(
def test_no_overwrite_flag_on_copy_when_small_object_does_not_exist_on_target(
self,
):
cmdline = f'{self.prefix} s3://bucket1/key.txt s3://bucket/key1.txt --no-overwrite'
# Set up responses for multipart copy (since no-overwrite always uses multipart)
full_path = self.files.create_file('foo.txt', 'mycontent')
cmdline = '%s %s s3://bucket/key.txt --no-overwrite' % (self.prefix, full_path)
self.parsed_responses = [
self.head_object_response(), # HeadObject to get source metadata
self.create_mpu_response('foo'), # CreateMultipartUpload response
self.upload_part_copy_response(), # UploadPartCopy response
{}, # CompleteMultipartUpload response
{'ETag': '"c8afdb36c52cf4727836669019e69222"'}
]
self.run_cmd(cmdline, expected_rc=0)
# Verify all multipart operations were called
self.assertEqual(len(self.operations_called), 4)
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
self.assertEqual(
self.operations_called[1][0].name, 'CreateMultipartUpload'
)
self.assertEqual(self.operations_called[2][0].name, 'UploadPartCopy')
# The only operation we should have called is PutObject.
self.assertEqual(
self.operations_called[3][0].name, 'CompleteMultipartUpload'
len(self.operations_called), 1, self.operations_called
)
# Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request
self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*')
self.assertEqual(self.operations_called[0][0].name, 'PutObject')
self.assertEqual(self.operations_called[0][1]['IfNoneMatch'], '*')

def test_no_overwrite_flag_on_copy_when_small_object_exists_on_target(
self,
):
cmdline = f'{self.prefix} s3://bucket1/key.txt s3://bucket/key.txt --no-overwrite'
# Set up responses for multipart copy (since no-overwrite always uses multipart)
self.parsed_responses = [
self.head_object_response(), # HeadObject to get source metadata
self.create_mpu_response('foo'), # CreateMultipartUpload response
self.upload_part_copy_response(), # UploadPartCopy response
self.precondition_failed_error_response(), # CompleteMultipartUpload
{}, # AbortMultipartUpload response
]
self.run_cmd(cmdline, expected_rc=0)
# Verify all multipart operations were called
self.assertEqual(len(self.operations_called), 5)
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
self.assertEqual(
self.operations_called[1][0].name, 'CreateMultipartUpload'
)
self.assertEqual(self.operations_called[2][0].name, 'UploadPartCopy')
self.assertEqual(
self.operations_called[3][0].name, 'CompleteMultipartUpload'
)
self.assertEqual(
self.operations_called[4][0].name, 'AbortMultipartUpload'
)
# Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request
self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*')

def test_no_overwrite_flag_on_copy_when_zero_size_object_exists_at_destination(
self,
):
cmdline = f'{self.prefix} s3://bucket1/file.txt s3://bucket2/file.txt --no-overwrite'
self.parsed_responses = [
self.head_object_response(
ContentLength=0
), # Source object (zero size)
self.head_object_response(), # Checking the object at destination
self.head_object_response(ContentLength=5),
self.precondition_failed_error_response(),
]
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(len(self.operations_called), 2)
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
self.assertEqual(self.operations_called[1][0].name, 'HeadObject')

def test_no_overwrite_flag_on_copy_when_zero_size_object_not_exists_at_destination(
self,
):
cmdline = f'{self.prefix} s3://bucket1/file.txt s3://bucket2/file1.txt --no-overwrite'
self.parsed_responses = [
self.head_object_response(
ContentLength=0
), # Source object (zero size)
{
'Error': {'Code': '404', 'Message': 'Not Found'}
}, # At destination object does not exists
self.copy_object_response(), # Copy Request when object does not exists
]
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(len(self.operations_called), 3)
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
self.assertEqual(self.operations_called[1][0].name, 'HeadObject')
self.assertEqual(self.operations_called[2][0].name, 'CopyObject')
self.assertEqual(self.operations_called[1][0].name, 'CopyObject')
self.assertEqual(self.operations_called[1][1]['IfNoneMatch'], '*')

def test_no_overwrite_flag_on_copy_when_large_object_exists_on_target(
self,
Expand Down
77 changes: 13 additions & 64 deletions tests/functional/s3/test_mv_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,26 +423,21 @@ def test_mv_no_overwrite_flag_on_copy_when_small_object_does_not_exist_on_target
# Set up responses for multipart copy (since no-overwrite always uses multipart)
self.parsed_responses = [
self.head_object_response(), # HeadObject to get source metadata
self.create_mpu_response('foo'), # CreateMultipartUpload response
self.upload_part_copy_response(), # UploadPartCopy response
{}, # CompleteMultipartUpload response
self.delete_object_response(), # DeleteObject (for move operation)
self.copy_object_response(),
self.delete_object_response(),
]
self.run_cmd(cmdline, expected_rc=0)
# Verify all multipart copy operations were called
self.assertEqual(len(self.operations_called), 5)
self.assertEqual(len(self.operations_called), 5)
self.assertEqual(len(self.operations_called), 3)
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
self.assertEqual(
self.operations_called[1][0].name, 'CreateMultipartUpload'
self.operations_called[1][0].name, 'CopyObject'
)
self.assertEqual(self.operations_called[2][0].name, 'UploadPartCopy')
self.assertEqual(self.operations_called[1][1]['IfNoneMatch'], '*')

self.assertEqual(
self.operations_called[3][0].name, 'CompleteMultipartUpload'
self.operations_called[2][0].name, 'DeleteObject'
)
self.assertEqual(self.operations_called[4][0].name, 'DeleteObject')
# Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request
self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*')

def test_mv_no_overwrite_flag_on_copy_when_small_object_exists_on_target(
self,
Expand All @@ -451,65 +446,19 @@ def test_mv_no_overwrite_flag_on_copy_when_small_object_exists_on_target(
# Set up responses for multipart copy (since no-overwrite always uses multipart)
self.parsed_responses = [
self.head_object_response(), # HeadObject to get source metadata
self.create_mpu_response('foo'), # CreateMultipartUpload response
self.upload_part_copy_response(), # UploadPartCopy response
self.precondition_failed_error_response(), # CompleteMultipartUpload response
{}, # AbortMultipart
self.precondition_failed_error_response(), # CopyObject response
]
self.run_cmd(cmdline, expected_rc=0)
# Set up the response to simulate a PreconditionFailed error
self.http_response.status_code = 412
# Verify all multipart copy operations were called
self.assertEqual(len(self.operations_called), 5)
# Verify all copy operations were called
self.assertEqual(len(self.operations_called), 2)
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
self.assertEqual(
self.operations_called[1][0].name, 'CreateMultipartUpload'
)
self.assertEqual(self.operations_called[2][0].name, 'UploadPartCopy')
self.assertEqual(
self.operations_called[3][0].name, 'CompleteMultipartUpload'
)
self.assertEqual(
self.operations_called[4][0].name, 'AbortMultipartUpload'
self.operations_called[1][0].name, 'CopyObject'
)
# Verify the IfNoneMatch condition was set in the CompleteMultipartUpload request
self.assertEqual(self.operations_called[3][1]['IfNoneMatch'], '*')

def test_no_overwrite_flag_on_copy_when_zero_size_object_exists_at_destination(
self,
):
cmdline = f'{self.prefix} s3://bucket1/file.txt s3://bucket2/file.txt --no-overwrite'
self.parsed_responses = [
self.head_object_response(
ContentLength=0
), # Source object (zero size)
self.head_object_response(), # Checking the object at destination
]
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(len(self.operations_called), 2)
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
self.assertEqual(self.operations_called[1][0].name, 'HeadObject')

def test_no_overwrite_flag_on_copy_when_zero_size_object_not_exists_at_destination(
self,
):
cmdline = f'{self.prefix} s3://bucket1/file.txt s3://bucket2/file1.txt --no-overwrite'
self.parsed_responses = [
self.head_object_response(
ContentLength=0
), # Source object (zero size)
{
'Error': {'Code': '404', 'Message': 'Not Found'}
}, # At destination object does not exists
self.copy_object_response(), # Copy Request when object does not exists
self.delete_object_response(), # Delete Request for move object
]
self.run_cmd(cmdline, expected_rc=0)
self.assertEqual(len(self.operations_called), 4)
self.assertEqual(self.operations_called[0][0].name, 'HeadObject')
self.assertEqual(self.operations_called[1][0].name, 'HeadObject')
self.assertEqual(self.operations_called[2][0].name, 'CopyObject')
self.assertEqual(self.operations_called[3][0].name, 'DeleteObject')
# Verify the IfNoneMatch condition was set in the CopyObject request
self.assertEqual(self.operations_called[1][1]['IfNoneMatch'], '*')

def test_mv_no_overwrite_flag_when_large_object_exists_on_target(self):
cmdline = f'{self.prefix} s3://bucket1/key1.txt s3://bucket/key1.txt --no-overwrite'
Expand Down
Loading