-
Notifications
You must be signed in to change notification settings - Fork 0
/
nodriver_with_proxy.py
116 lines (99 loc) · 3.29 KB
/
nodriver_with_proxy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import zipfile
import os
from time import sleep
import nodriver as uc
from nodriver import Config
from bs4 import BeautifulSoup as bs
# pip install nodriver bs4
# only these two format is allowed, the proxy protocol can be set accordingly too
proxy = "http://username:password:host:port"
proxy_2 = "http://host:port"
def createProxyPlugin(PROTOCOL, PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS):
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
"""
background_js = """
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "%s",
host: "%s",
port: parseInt(%s)
},
bypassList: ["localhost"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "%s",
password: "%s"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
);
""" % (PROTOCOL, PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)
pluginfile = 'proxy_auth_plugin.zip'
with zipfile.ZipFile(pluginfile, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
def getConfigWithNewProxy(proxy):
config = Config()
picked_proxy = proxy
print("Proxy selected for the session: {}".format(picked_proxy))
proxy_protocol = picked_proxy.split("://")[0]
other_parts = picked_proxy.split("://")[1].split(':')
if len(other_parts) == 4:
proxy_user = other_parts[0]
proxy_pass = other_parts[1]
proxy_host = other_parts[2]
proxy_port = other_parts[3]
createProxyPlugin(proxy_protocol, proxy_host,
proxy_port, proxy_user, proxy_pass)
config.add_extension(os.path.join(
os.getcwd(), "proxy_auth_plugin.zip"))
else:
config.add_argument("--proxy-server={}".format(picked_proxy))
# config.add_argument("--blink-settings=imagesEnabled=false")
return config
async def getPageData():
driver = await uc.start(config=getConfigWithNewProxy(proxy))
sleep(1)
page = await driver.get("http://www.google.com")
page_html = await page.get_content()
soup = bs(page_html, 'html.parser')
# now get any element using soup
await driver.stop()
# start another instance with a difference proxy
driver = await uc.start(config=getConfigWithNewProxy(proxy_2))
sleep(1)
page = await driver.get("http://www.google.com")
page_html = await page.get_content()
soup = bs(page_html, 'html.parser')
# now get any element using soup
await driver.stop()
if __name__ == "__main__":
uc.loop().run_until_complete(getPageData())