Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: properly close streaming requests when not completely consumed #9899

Merged
merged 1 commit into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 51 additions & 37 deletions src/poetry/inspection/lazy_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,15 +428,20 @@ def _stream_response(self, start: int, end: int) -> Response:
headers["Range"] = f"bytes={start}-{end}"
logger.debug("streamed bytes request: %s", headers["Range"])
self._request_count += 1

response = self._session.get(self._url, headers=headers, stream=True)
response.raise_for_status()
if int(response.headers["Content-Length"]) != (end - start + 1):
raise HTTPRangeRequestNotRespectedError(
f"server did not respect byte range request: "
f"requested {end - start + 1} bytes, got "
f"{response.headers['Content-Length']} bytes"
)
return response
try:
response.raise_for_status()
if int(response.headers["Content-Length"]) != (end - start + 1):
raise HTTPRangeRequestNotRespectedError(
f"server did not respect byte range request: "
f"requested {end - start + 1} bytes, got "
f"{response.headers['Content-Length']} bytes"
)
return response
except BaseException:
Tasssadar marked this conversation as resolved.
Show resolved Hide resolved
response.close()
raise

def _fetch_content_range(self, start: int, end: int) -> Iterator[bytes]:
"""Perform a series of HTTP range requests to cover the specified byte range.
Expand All @@ -445,7 +450,8 @@ def _fetch_content_range(self, start: int, end: int) -> Iterator[bytes]:
method must *include* the byte indexed at argument ``end`` (so e.g. ``0-1`` is 2
bytes long, and the range can never be empty).
"""
yield from self._stream_response(start, end).iter_content(CONTENT_CHUNK_SIZE)
with self._stream_response(start, end) as response:
yield from response.iter_content(CONTENT_CHUNK_SIZE)

@contextmanager
def _stay(self) -> Iterator[None]:
Expand Down Expand Up @@ -549,7 +555,7 @@ def _fetch_content_length(self) -> int:
else:
# If we *could* download some file contents, then write them to the end of
# the file and set up our bisect boundaries by hand.
with self._stay():
with self._stay(), tail:
response_length = int(tail.headers["Content-Length"])
assert response_length == min(initial_chunk_size, ret_length)
self.seek(-response_length, io.SEEK_END)
Expand Down Expand Up @@ -600,36 +606,43 @@ def _try_initial_chunk_request(

self._request_count += 1
tail = self._session.get(self._url, headers=headers, stream=True)
tail.raise_for_status()

code = tail.status_code
if code != codes.partial_content:
# According to
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests,
# a 200 OK implies that range requests are not supported,
# regardless of the requested size.
# However, some servers that support negative range requests also return a
# 200 OK if the requested range from the end was larger than the file size.
if code == codes.ok:
accept_ranges = tail.headers.get("Accept-Ranges", None)
content_length = int(tail.headers["Content-Length"])
if accept_ranges == "bytes" and content_length <= initial_chunk_size:
return content_length, tail
try:
tail.raise_for_status()

code = tail.status_code
if code != codes.partial_content:
# According to
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests,
# a 200 OK implies that range requests are not supported,
# regardless of the requested size.
# However, some servers that support negative range requests also return a
# 200 OK if the requested range from the end was larger than the file size.
if code == codes.ok:
accept_ranges = tail.headers.get("Accept-Ranges", None)
content_length = int(tail.headers["Content-Length"])
if (
accept_ranges == "bytes"
and content_length <= initial_chunk_size
):
return content_length, tail

raise HTTPRangeRequestUnsupportedError(
f"did not receive partial content: got code {code}"
)

raise HTTPRangeRequestUnsupportedError(
f"did not receive partial content: got code {code}"
)
if "Content-Range" not in tail.headers:
raise LazyWheelUnsupportedError(
f"file length cannot be determined for {self._url}, "
f"did not receive content range header from server"
)

if "Content-Range" not in tail.headers:
raise LazyWheelUnsupportedError(
f"file length cannot be determined for {self._url}, "
f"did not receive content range header from server"
file_length = self._parse_full_length_from_content_range(
tail.headers["Content-Range"]
)

file_length = self._parse_full_length_from_content_range(
tail.headers["Content-Range"]
)
return (file_length, tail)
return (file_length, tail)
except BaseException:
tail.close()
raise

def _extract_content_length(
self, initial_chunk_size: int
Expand Down Expand Up @@ -683,6 +696,7 @@ def _extract_content_length(
if int(tail.headers["Content-Length"]) > initial_chunk_size or tail.headers.get(
"Content-Range", ""
).startswith("bytes -"):
tail.close()
tail = None
self._domains_without_negative_range.add(domain)
return file_length, tail
Expand Down
16 changes: 11 additions & 5 deletions src/poetry/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,20 +196,26 @@ def _get(self, start: int = 0) -> Response:
headers = {"Accept-Encoding": "Identity"}
if start > 0:
headers["Range"] = f"bytes={start}-"

response = self._session.get(
self._url, stream=True, headers=headers, timeout=REQUESTS_TIMEOUT
)
response.raise_for_status()
return response
try:
response.raise_for_status()
return response
except BaseException:
response.close()
raise

def _iter_content_with_resume(self, chunk_size: int) -> Iterator[bytes]:
fetched_size = 0
retries = 0
while True:
try:
for chunk in self._response.iter_content(chunk_size=chunk_size):
yield chunk
fetched_size += len(chunk)
with self._response:
for chunk in self._response.iter_content(chunk_size=chunk_size):
yield chunk
fetched_size += len(chunk)
except (ChunkedEncodingError, ConnectionError):
if (
retries < self._max_retries
Expand Down
Loading