Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v2] Update checksum calculation and validation for vendored botocore #9091

Open
wants to merge 5 commits into
base: request-response-checksum-calculation
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions awscli/botocore/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@
from botocore.utils import (
SAFE_CHARS,
ArnParser,
conditionally_calculate_checksum,
conditionally_calculate_md5,
hyphenize_service_id,
is_global_accesspoint,
percent_encode,
Expand Down Expand Up @@ -1151,6 +1149,17 @@ def _update_status_code(response, **kwargs):
http_response.status_code = parsed_status_code


def handle_request_validation_mode_member(params, model, **kwargs):
client_config = kwargs.get("context", {}).get("client_config")
if client_config is None:
return
response_checksum_validation = client_config.response_checksum_validation
http_checksum = model.http_checksum
mode_member = http_checksum.get("requestValidationModeMember")
if mode_member and response_checksum_validation == "when_supported":
params.setdefault(mode_member, "ENABLED")


# This is a list of (event_name, handler).
# When a Session is created, everything in this list will be
# automatically registered with that Session.
Expand All @@ -1177,7 +1186,7 @@ def _update_status_code(response, **kwargs):
('before-parse.s3.*', handle_expires_header),
('before-parse.s3.*', _handle_200_error, REGISTER_FIRST),
('before-parameter-build', generate_idempotent_uuid),

('before-parameter-build', handle_request_validation_mode_member),
('before-parameter-build.s3', validate_bucket_name),
('before-parameter-build.s3', remove_bucket_from_url_paths_from_model),

Expand Down Expand Up @@ -1205,10 +1214,7 @@ def _update_status_code(response, **kwargs):
('before-call.s3', add_expect_header),
('before-call.glacier', add_glacier_version),
('before-call.api-gateway', add_accept_header),
('before-call.s3.PutObject', conditionally_calculate_checksum),
('before-call.s3.UploadPart', conditionally_calculate_md5),
('before-call.s3.DeleteObjects', escape_xml_payload),
('before-call.s3.DeleteObjects', conditionally_calculate_checksum),
('before-call.s3.PutBucketLifecycleConfiguration', escape_xml_payload),
('before-call.glacier.UploadArchive', add_glacier_checksums),
('before-call.glacier.UploadMultipartPart', add_glacier_checksums),
Expand Down
73 changes: 39 additions & 34 deletions awscli/botocore/httpchecksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@
from hashlib import sha1, sha256

from awscrt import checksums as crt_checksums
from botocore.compat import urlparse
from botocore.exceptions import AwsChunkedWrapperError, FlexibleChecksumError
from botocore.response import StreamingBody
from botocore.utils import (
conditionally_calculate_md5,
determine_content_length,
)
from botocore.utils import determine_content_length, has_checksum_header

logger = logging.getLogger(__name__)

DEFAULT_CHECKSUM_ALGORITHM = "CRC32"


class BaseChecksum:
_CHUNK_SIZE = 1024 * 1024
Expand Down Expand Up @@ -246,7 +246,18 @@ def resolve_checksum_context(request, operation_model, params):
def resolve_request_checksum_algorithm(
request, operation_model, params, supported_algorithms=None,
):
# If the header is already set by the customer, skip calculation
if has_checksum_header(request):
return

request_checksum_calculation = request["context"][
"client_config"
].request_checksum_calculation
http_checksum = operation_model.http_checksum
request_checksum_required = (
operation_model.http_checksum_required
or http_checksum.get("requestChecksumRequired")
)
algorithm_member = http_checksum.get("requestAlgorithmMember")
if algorithm_member and algorithm_member in params:
# If the client has opted into using flexible checksums and the
Expand All @@ -259,35 +270,32 @@ def resolve_request_checksum_algorithm(
raise FlexibleChecksumError(
error_msg="Unsupported checksum algorithm: %s" % algorithm_name
)
elif request_checksum_required or (
algorithm_member and request_checksum_calculation == "when_supported"
):
algorithm_name = DEFAULT_CHECKSUM_ALGORITHM.lower()
else:
return

location_type = "header"
if operation_model.has_streaming_input:
# Operations with streaming input must support trailers.
if request["url"].startswith("https:"):
# We only support unsigned trailer checksums currently. As this
# disables payload signing we'll only use trailers over TLS.
location_type = "trailer"

algorithm = {
"algorithm": algorithm_name,
"in": location_type,
"name": "x-amz-checksum-%s" % algorithm_name,
}
location_type = "header"
if (
operation_model.has_streaming_input
and urlparse(request["url"]).scheme == "https"
):
# Operations with streaming input must support trailers.
# We only support unsigned trailer checksums currently. As this
# disables payload signing we'll only use trailers over TLS.
location_type = "trailer"

if algorithm["name"] in request["headers"]:
# If the header is already set by the customer, skip calculation
return
algorithm = {
"algorithm": algorithm_name,
"in": location_type,
"name": f"x-amz-checksum-{algorithm_name}",
}

checksum_context = request["context"].get("checksum", {})
checksum_context["request_algorithm"] = algorithm
request["context"]["checksum"] = checksum_context
elif operation_model.http_checksum_required or http_checksum.get(
"requestChecksumRequired"
):
# Otherwise apply the old http checksum behavior via Content-MD5
checksum_context = request["context"].get("checksum", {})
checksum_context["request_algorithm"] = "conditional-md5"
request["context"]["checksum"] = checksum_context
checksum_context = request["context"].get("checksum", {})
checksum_context["request_algorithm"] = algorithm
request["context"]["checksum"] = checksum_context


def apply_request_checksum(request):
Expand All @@ -297,10 +305,7 @@ def apply_request_checksum(request):
if not algorithm:
return

if algorithm == "conditional-md5":
# Special case to handle the http checksum required trait
conditionally_calculate_md5(request)
elif algorithm["in"] == "header":
if algorithm["in"] == "header":
_apply_request_header_checksum(request)
elif algorithm["in"] == "trailer":
_apply_request_trailer_checksum(request)
Expand Down
29 changes: 20 additions & 9 deletions awscli/botocore/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2962,6 +2962,7 @@ def get_encoding_from_headers(headers, default='ISO-8859-1'):


def calculate_md5(body, **kwargs):
"""This function has been deprecated, but is kept for backwards compatibility."""
if isinstance(body, (bytes, bytearray)):
binary_md5 = _calculate_md5_from_bytes(body)
else:
Expand All @@ -2970,11 +2971,13 @@ def calculate_md5(body, **kwargs):


def _calculate_md5_from_bytes(body_bytes):
"""This function has been deprecated, but is kept for backwards compatibility."""
md5 = get_md5(body_bytes)
return md5.digest()


def _calculate_md5_from_file(fileobj):
"""This function has been deprecated, but is kept for backwards compatibility."""
start_position = fileobj.tell()
md5 = get_md5()
for chunk in iter(lambda: fileobj.read(1024 * 1024), b''):
Expand All @@ -2990,15 +2993,17 @@ def _is_s3express_request(params):
return endpoint_properties.get('backend') == 'S3Express'


def _has_checksum_header(params):
def has_checksum_header(params):
"""
Checks if a header starting with "x-amz-checksum-" is provided in a request.

This class is considered private and subject to abrupt breaking changes or
removal without prior announcement. Please do not use it directly.
"""
headers = params['headers']
# If a user provided Content-MD5 is present,
# don't try to compute a new one.
if 'Content-MD5' in headers:
return True

# If a header matching the x-amz-checksum-* pattern is present, we
# assume a checksum has already been provided and an md5 is not needed
# assume a checksum has already been provided by the user.
for header in headers:
if CHECKSUM_HEADER_PATTERN.match(header):
return True
Expand All @@ -3007,12 +3012,14 @@ def _has_checksum_header(params):


def conditionally_calculate_checksum(params, **kwargs):
if not _has_checksum_header(params):
"""This function has been deprecated, but is kept for backwards compatibility."""
if not has_checksum_header(params):
conditionally_calculate_md5(params, **kwargs)
conditionally_enable_crc32(params, **kwargs)


def conditionally_enable_crc32(params, **kwargs):
"""This function has been deprecated, but is kept for backwards compatibility."""
checksum_context = params.get('context', {}).get('checksum', {})
checksum_algorithm = checksum_context.get('request_algorithm')
if (
Expand All @@ -3030,15 +3037,19 @@ def conditionally_enable_crc32(params, **kwargs):


def conditionally_calculate_md5(params, **kwargs):
"""Only add a Content-MD5 if the system supports it."""
"""
This function has been deprecated, but is kept for backwards compatibility.

Only add a Content-MD5 if the system supports it.
"""
body = params['body']
checksum_context = params.get('context', {}).get('checksum', {})
checksum_algorithm = checksum_context.get('request_algorithm')
if checksum_algorithm and checksum_algorithm != 'conditional-md5':
# Skip for requests that will have a flexible checksum applied
return

if _has_checksum_header(params):
if has_checksum_header(params):
# Don't add a new header if one is already available.
return

Expand Down
Loading
Loading