Skip to content

Commit

Permalink
fix(PlaywrightFetcher): Use more dependable response data
Browse files Browse the repository at this point in the history
  • Loading branch information
D4Vinci committed Dec 25, 2024
1 parent 3cca3fd commit f9b85cf
Showing 1 changed file with 10 additions and 12 deletions.
22 changes: 10 additions & 12 deletions scrapling/engines/pw.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def fetch(self, url: str) -> Response:

def handle_response(finished_response: PlaywrightResponse):
nonlocal final_response
if finished_response.request.resource_type == "document":
if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
final_response = finished_response

with sync_playwright() as p:
Expand Down Expand Up @@ -252,7 +252,6 @@ def handle_response(finished_response: PlaywrightResponse):
if self.network_idle:
page.wait_for_load_state('networkidle')

response_bytes = final_response.body() if final_response else page.content().encode('utf-8')
# In case we didn't catch a document type somehow
final_response = final_response if final_response else first_response
# This will be parsed inside `Response`
Expand All @@ -261,15 +260,15 @@ def handle_response(finished_response: PlaywrightResponse):
status_text = final_response.status_text or StatusText.get(final_response.status)

response = Response(
url=final_response.url,
url=page.url,
text=page.content(),
body=response_bytes,
body=page.content().encode('utf-8'),
status=final_response.status,
reason=status_text,
encoding=encoding,
cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
headers=final_response.all_headers(),
request_headers=final_response.request.all_headers(),
headers=first_response.all_headers(),
request_headers=first_response.request.all_headers(),
**self.adaptor_arguments
)
page.close()
Expand All @@ -293,7 +292,7 @@ async def async_fetch(self, url: str) -> Response:

async def handle_response(finished_response: PlaywrightResponse):
nonlocal final_response
if finished_response.request.resource_type == "document":
if finished_response.request.resource_type == "document" and finished_response.request.is_navigation_request():
final_response = finished_response

async with async_playwright() as p:
Expand Down Expand Up @@ -339,7 +338,6 @@ async def handle_response(finished_response: PlaywrightResponse):
if self.network_idle:
await page.wait_for_load_state('networkidle')

response_bytes = await final_response.body() if final_response else (await page.content()).encode('utf-8')
# In case we didn't catch a document type somehow
final_response = final_response if final_response else first_response
# This will be parsed inside `Response`
Expand All @@ -348,15 +346,15 @@ async def handle_response(finished_response: PlaywrightResponse):
status_text = final_response.status_text or StatusText.get(final_response.status)

response = Response(
url=final_response.url,
url=page.url,
text=await page.content(),
body=response_bytes,
body=(await page.content()).encode('utf-8'),
status=final_response.status,
reason=status_text,
encoding=encoding,
cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
headers=await final_response.all_headers(),
request_headers=await final_response.request.all_headers(),
headers=await first_response.all_headers(),
request_headers=await first_response.request.all_headers(),
**self.adaptor_arguments
)
await page.close()
Expand Down

0 comments on commit f9b85cf

Please sign in to comment.