Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A Browser closed issue #549

Open
defaul0t opened this issue May 14, 2023 · 3 comments
Open

A Browser closed issue #549

defaul0t opened this issue May 14, 2023 · 3 comments

Comments

@defaul0t
Copy link

Unhandled error: Browser closed unexpectedly:

closedUnhandled error: Browser closed unexpectedly:

my code

from asyncio import events
import uvloop
import requests
import asyncio, time
import re
import argparse
import sys
import threading
from requests_html import AsyncHTMLSession, HTMLSession
import urllib3
from pyppeteer import launch
import hashlib
import os

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0',
'Content-Encoding': 'gzip'
}

asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())

def get_url(url_txt):
with open(url_txt, "r") as f:
s = f.readlines()
lt = [i.strip() for i in s]
return lt

def output_data(i, out_name):
with open(out_name, "a", encoding='utf-8') as f:
f.write(i + "\n")

def get_md5_value(src):
myMd5 = hashlib.md5()
myMd5.update(src.encode("utf8"))
myMd5_Digest = myMd5.hexdigest()
return myMd5_Digest

async def process_data(sem, s, i, None_data_list):
async with sem:
try:

        r = await s.get(url=i, timeout=30, headers=headers, verify=False)

        await r.html.arender(wait=30, sleep=30, timeout=30, retries=1)
        content_length = len(r.content)
        code = r.status_code
        content = r.html.html.replace('\r', '').replace('\n', '').replace(' ', '')
        body_md5 = get_md5_value(str(content))

        if '<title>' in content:
            title = re.findall('(?<=<title>)(.+?)(?=</title>)', content)[0]
        elif r.html.find('title', first=True):
            title = r.html.find('title', first=True).text
        else:
            title = 'None'
            output_data(i, 'real_None.txt')
        print(f'{i} {r.status_code}, {title}')
        data = [str(code), str(title), str(content_length), body_md5, str(i)]
        None_data_list.append(data)

    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
    except BaseException as e:
        print(f"Unhandled error: {e}")
        # Close only the current browser instance if possible

async def start_up(urls, None_data_list, timeout_duration=3000):
s = AsyncHTMLSession(verify=False)
sem = asyncio.Semaphore(3)
tasks = (process_data(sem, s, url, None_data_list) for url in urls)
await asyncio.wait_for(asyncio.gather(*tasks), timeout=timeout_duration)
await s.close()

def main(urls):
None_data_list = []
try:
start = time.perf_counter()
print(urls)
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(start_up(urls, None_data_list))
end = time.perf_counter()
print(f'None_Scan : {end - start} ')
output_data(str(end - start), 'debug_time.txt')
print('')
except asyncio.TimeoutError:
print("Timeout occurred")
except Exception as e:
print(e)
finally:
print(len(None_data_list))
os.system('pkill -f -9 chrome')
return None_data_list

test.py
new_request_None_url = ['http://bi-mokadisplay.tcl.com:83','http://tmsa.cmp.tcl.com:88']

update_data_list = nonetitle_info.main(new_request_None_url)

print(update_data_list)

#data_info.none_update(False, update_data_list)

@aehlke
Copy link

aehlke commented May 23, 2023

figure it out?

@ajatkj
Copy link

ajatkj commented Aug 1, 2023

This project uses pyppeteer which is uses very old version of Chromium. This is easily fixable. You can check my comment on another issue here.
Let me know if this helps.

@cboin1996
Copy link

cboin1996 commented Apr 17, 2024

I forked this project and updated it to use playwright. see: #573

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

4 participants