Skip to content

Commit

Permalink
fix(StealthyFetcher): Use more dependable response data
Browse files Browse the repository at this point in the history
  • Loading branch information
D4Vinci committed Dec 25, 2024
1 parent f9b85cf commit 2006be2
Showing 1 changed file with 10 additions and 12 deletions.
22 changes: 10 additions & 12 deletions scrapling/engines/camo.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def fetch(self, url: str) -> Response:

def handle_response(finished_response):
nonlocal final_response
if finished_response.request.resource_type == "document":
if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
final_response = finished_response

with Camoufox(
Expand Down Expand Up @@ -133,7 +133,6 @@ def handle_response(finished_response):
if self.network_idle:
page.wait_for_load_state('networkidle')

response_bytes = final_response.body() if final_response else page.content().encode('utf-8')
# In case we didn't catch a document type somehow
final_response = final_response if final_response else first_response
# This will be parsed inside `Response`
Expand All @@ -142,15 +141,15 @@ def handle_response(finished_response):
status_text = final_response.status_text or StatusText.get(final_response.status)

response = Response(
url=final_response.url,
url=page.url,
text=page.content(),
body=response_bytes,
body=page.content().encode('utf-8'),
status=final_response.status,
reason=status_text,
encoding=encoding,
cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
headers=final_response.all_headers(),
request_headers=final_response.request.all_headers(),
headers=first_response.all_headers(),
request_headers=first_response.request.all_headers(),
**self.adaptor_arguments
)
page.close()
Expand All @@ -169,7 +168,7 @@ async def async_fetch(self, url: str) -> Response:

async def handle_response(finished_response):
nonlocal final_response
if finished_response.request.resource_type == "document":
if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
final_response = finished_response

async with AsyncCamoufox(
Expand Down Expand Up @@ -213,7 +212,6 @@ async def handle_response(finished_response):
if self.network_idle:
await page.wait_for_load_state('networkidle')

response_bytes = await final_response.body() if final_response else (await page.content()).encode('utf-8')
# In case we didn't catch a document type somehow
final_response = final_response if final_response else first_response
# This will be parsed inside `Response`
Expand All @@ -222,15 +220,15 @@ async def handle_response(finished_response):
status_text = final_response.status_text or StatusText.get(final_response.status)

response = Response(
url=final_response.url,
url=page.url,
text=await page.content(),
body=response_bytes,
body=(await page.content()).encode('utf-8'),
status=final_response.status,
reason=status_text,
encoding=encoding,
cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
headers=await final_response.all_headers(),
request_headers=await final_response.request.all_headers(),
headers=await first_response.all_headers(),
request_headers=await first_response.request.all_headers(),
**self.adaptor_arguments
)
await page.close()
Expand Down

0 comments on commit 2006be2

Please sign in to comment.