Skip to content

Commit

Permalink
v1.2.7
Browse files Browse the repository at this point in the history
  • Loading branch information
Cuadrix committed Jun 29, 2020
1 parent 80189e6 commit 7962b25
Show file tree
Hide file tree
Showing 9 changed files with 243 additions and 157 deletions.
111 changes: 38 additions & 73 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ Forwards intercepted requests from the browser to Node.js where it handles the r

## Features

- Proxy per page **and** per request
- Supports **(** http, https, socks4, socks5 **)** proxies
- Authentication
- Cookie handling internally
- Proxy per page and proxy per request
- Supports **http**, **https**, **socks4** and **socks5** proxies
- Supports authentication
- Handles cookies

## Installation
```
Expand All @@ -20,9 +20,9 @@ npm i puppeteer-page-proxy
- `pageOrReq` <[object](https://developer.mozilla.org/en-US/docs/Glossary/Object)> 'Page' or 'Request' object to set a proxy for.
- `proxy` <[string](https://developer.mozilla.org/en-US/docs/Glossary/String)|[object](https://developer.mozilla.org/en-US/docs/Glossary/Object)> Proxy to use in the current page.
* Begins with a protocol (e.g. http://, https://, socks://)
* In the case of [proxy per request](https://github.com/Cuadrix/puppeteer-page-proxy#proxy-per-request), this can be an object with optional properites for overriding requests:\
* In the case of [proxy per request](https://github.com/Cuadrix/puppeteer-page-proxy#proxy-per-request), this can be an object with optional properties for overriding requests:\
`url`, `method`, `postData`, `headers`\
See [request.continue](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#requestcontinueoverrides) for more info about the above properties.
See [httpRequest.continue](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#httprequestcontinueoverrides) for more info about the above properties.

#### PageProxy.lookup(page[, lookupService, isJSON, timeout])

Expand All @@ -38,26 +38,14 @@ See [request.continue](https://github.com/puppeteer/puppeteer/blob/master/docs/a
**NOTE:** By default this method expects a response in [JSON](https://en.wikipedia.org/wiki/JSON#Example) format and [JSON.parse](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/parse)'s it to a usable javascript object. To disable this functionality, set `isJSON` to `false`.

## Usage
#### Proxy per page:
#### Importing:
```js
const puppeteer = require('puppeteer');
const useProxy = require('puppeteer-page-proxy');
```

(async () => {
const site = 'https://example.com';
const proxy = 'http://host:port';
const proxy2 = 'https://host:port';

const browser = await puppeteer.launch({headless: false});

const page = await browser.newPage();
await useProxy(page, proxy);
await page.goto(site);

const page2 = await browser.newPage();
await useProxy(page2, proxy2);
await page2.goto(site);
})();
#### Proxy per page:
```js
await useProxy(page, 'http://127.0.0.1:80');
```
To remove proxy, omit or pass in falsy value (e.g `null`):
```js
Expand All @@ -66,43 +54,31 @@ await useProxy(page, null);

#### Proxy per request:
```js
const puppeteer = require('puppeteer');
const useProxy = require('puppeteer-page-proxy');

(async () => {
const site = 'https://example.com';
const proxy = 'socks://host:port';

const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();

await page.setRequestInterception(true);
page.on('request', async req => {
await useProxy(req, proxy);
});
await page.goto(site);
})();
await page.setRequestInterception(true);
page.on('request', async request => {
await useProxy(request, 'https://127.0.0.1:443');
});
```
The request object itself is passed as the first argument. The proxy can now be changed every request.

Using it along with other interception methods:
```js
await page.setRequestInterception(true);
page.on('request', async req => {
page.on('request', async request => {
if (req.resourceType() === 'image') {
req.abort();
} else {
await useProxy(req, proxy);
await useProxy(request, 'socks4://127.0.0.1:1080');
}
});
```

Overriding requests:
```js
await page.setRequestInterception(true);
page.on('request', async req => {
await useProxy(req, {
proxy: proxy,
page.on('request', async request => {
await useProxy(request, {
proxy: 'socks5://127.0.0.1:1080',
url: 'https://example.com',
method: 'POST',
postData: '404',
Expand All @@ -113,40 +89,29 @@ page.on('request', async req => {
});
```

**NOTE:** It is necessary to set [page.setRequestInterception](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#pagesetrequestinterceptionvalue) to true when setting proxies per request, otherwise the function will fail.
**NOTE:** It is necessary to set [page.setRequestInterception](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#pagesetrequestinterceptionvalue) to true when setting proxies per request, otherwise the function will fail.

#### Authentication:
#### Authenticating:
```js
const proxy = 'https://login:pass@host:port';
const proxy = 'https://user:pass@host:port';
```

#### Lookup IP used by proxy:
#### IP lookup:
```js
const puppeteer = require('puppeteer');
const useProxy = require('puppeteer-page-proxy');

(async () => {
const site = 'https://example.com';
const proxy1 = 'http://host:port';
const proxy2 = 'https://host:port';

const browser = await puppeteer.launch({headless: false});

// 1
const page1 = await browser.newPage();
await useProxy(page1, proxy1);
let data = await useProxy.lookup(page1); // Waits until done, 'then' continues
console.log(data.ip);
await page1.goto(site);
// 1. Waits until done, 'then' continues
const data = await useProxy.lookup(page1);
console.log(data.ip);

// 2
const page2 = await browser.newPage();
await useProxy(page2, proxy2);
useProxy.lookup(page2).then(data => { // Executes and 'comes back' once done
console.log(data.ip);
});
await page2.goto(site);
})();
// 2. Executes and 'comes back' once done
useProxy.lookup(page2).then(data => {
console.log(data.ip);
});
```
In case of any [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS) errors, use `--disable-web-security` launch flag:
```js
const browser = await puppeteer.launch({
args: ['--disable-web-security']
});
```

## FAQ
Expand All @@ -156,7 +121,7 @@ It takes over the task of requesting content **from** the browser to do it inter

#### Why am I getting _"Request is already handled!"_?

This happens when there is an attempt to handle the same request more than once. An intercepted request is handled by either [request.abort](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#requestaborterrorcode), [request.continue](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#requestcontinueoverrides) or [request.respond](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#requestrespondresponse) methods. Each of these methods 'send' the request to its destination. A request that has already reached its destination cannot be intercepted or handled.
This happens when there is an attempt to handle the same request more than once. An intercepted request is handled by either [httpRequest.abort](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#httprequestaborterrorcode), [httpRequest.continue](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#httprequestcontinueoverrides) or [httpRequest.respond](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#httprequestrespondresponse) methods. Each of these methods 'send' the request to its destination. A request that has already reached its destination cannot be intercepted or handled.


#### Why does the browser show _"Your connection to this site is not secure"_?
Expand Down
5 changes: 5 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# Change log
### [1.2.7] - 2020-06-30
#### Changes
- Reimplement cookie handling to account for deletion and addition of browser cookies
- Changed default lookup fetch source to **api64.ipify.org**
- Update documentation
### [1.2.6] - 2020-06-18
#### Changes
- Updated for Puppeteer's v4.0.0 [breaking changes](https://github.com/puppeteer/puppeteer/releases/tag/v4.0.0) ([#22](https://github.com/Cuadrix/puppeteer-page-proxy/issues/22), [#23](https://github.com/Cuadrix/puppeteer-page-proxy/issues/23))
Expand Down
8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "puppeteer-page-proxy",
"description": "Additional Node.js module to use with 'puppeteer' for setting proxies per page basis.",
"version": "1.2.6",
"version": "1.2.7",
"author": "Cuadrix <[email protected]> (https://github.com/Cuadrix)",
"homepage": "https://github.com/Cuadrix/puppeteer-page-proxy",
"main": "./src/index.js",
Expand All @@ -10,8 +10,8 @@
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type" : "git",
"url" : "https://github.com/Cuadrix/puppeteer-page-proxy.git"
"type": "git",
"url": "https://github.com/Cuadrix/puppeteer-page-proxy.git"
},
"keywords": [
"puppeteer",
Expand All @@ -28,4 +28,4 @@
"socks-proxy-agent": "^5.0.0",
"tough-cookie": "^4.0.0"
}
}
}
43 changes: 20 additions & 23 deletions src/core/lookup.js
Original file line number Diff line number Diff line change
@@ -1,39 +1,36 @@
const lookup = async (page, lookupService = "https://api.ipify.org?format=json", isJSON = true, timeout = 30000) => {
const lookup = async (page, lookupService = "https://api64.ipify.org?format=json", isJSON = true, timeout = 30000) => {
const doLookup = async () => {
return await page.evaluate((lookupService, timeout, isJSON) => {
return new Promise((resolve) => {
const req = new XMLHttpRequest();
req.timeout = timeout;
req.onload = () => {
if (req.status >= 200 && req.status <= 299) {
resolve(isJSON ? JSON.parse(req.responseText) : req.responseText);
} else {
resolve(onLookupFailed(`Request from ${window.location.href} to ${lookupService} failed with status code ${req.status}`));
}
const request = new XMLHttpRequest();
request.timeout = timeout;
request.onload = () => {
if (request.status >= 200 && request.status <= 299) {
resolve(isJSON ? JSON.parse(request.responseText) : request.responseText);
} else {resolve(onLookupFailed(
`Request from ${window.location.href} to ` +
`${lookupService} failed with status code ${request.status}`
))}
};
req.ontimeout = (error) => {
resolve(onLookupFailed(`Request from ${window.location.href} to ${lookupService} timed out -> ${req.timeout} ms`));
};
req.open("GET", lookupService, true);
req.send();
request.ontimeout = (error) => {resolve(onLookupFailed(
`Request from ${window.location.href} to ` +
`${lookupService} timed out at ${request.timeout} ms`
))};
request.open("GET", lookupService, true);
request.send();
});
}, lookupService, timeout, isJSON);
};
try {
await page.setBypassCSP(true);
const functionName = "onLookupFailed";
const functionName = "$ppp_on_lookup_failed";
if (!page._pageBindings.has(functionName)) {
await page.exposeFunction(functionName, (reason) => {
console.error(reason);
return;
await page.exposeFunction(functionName, (failReason) => {
console.error(failReason); return;
});
}
return await doLookup();
} catch(error) {
if (error.message.startsWith("Execution context was destroyed")) {
return await doLookup();
}
}
} catch(error) {console.error(error)}
};

module.exports = lookup;
65 changes: 39 additions & 26 deletions src/core/proxy.js
Original file line number Diff line number Diff line change
@@ -1,28 +1,41 @@
const request = require("got");
const type = require("../lib/types");
const cookieJar = require("../lib/cookies");
const {setOverrides, setHeaders, setAgent} = require("../lib/options");
const got = require("got");
const CookieHandler = require("../lib/cookies");
const {setHeaders, setAgent} = require("../lib/options");
const type = require("../util/types");

// Responsible for applying proxy
const proxyHandler = async (req, proxy) => {
const requestHandler = async (request, proxy, overrides = {}) => {
// Reject non http(s) URI schemes
if (!request.url().startsWith("http") && !request.url().startsWith("https")) {
request.continue(); return;
}
const cookieHandler = new CookieHandler(request);
// Request options for Got accounting for overrides
const options = {
cookieJar,
method: req.method(),
body: req.postData(),
headers: setHeaders(req),
cookieJar: await cookieHandler.getCookies(),
method: overrides.method || request.method(),
body: overrides.postData || request.postData(),
headers: overrides.headers || setHeaders(request),
agent: setAgent(proxy),
responseType: "buffer",
maxRedirects: 15,
throwHttpErrors: false
};
try {
const res = await request(req.url(), options);
await req.respond({
status: res.statusCode,
headers: res.headers,
body: res.body
const response = await got(overrides.url || request.url(), options);
// Set cookies manually because "set-cookie" doesn't set all cookies (?)
// Perhaps related to https://github.com/puppeteer/puppeteer/issues/5364
const setCookieHeader = response.headers["set-cookie"];
if (setCookieHeader) {
await cookieHandler.setCookies(setCookieHeader);
response.headers["set-cookie"] = undefined;
}
await request.respond({
status: response.statusCode,
headers: response.headers,
body: response.body
});
} catch(error) {await req.abort()}
} catch(error) {await request.abort()}
};

// For reassigning proxy of page
Expand All @@ -41,7 +54,7 @@ const removeRequestListener = (page, listenerName) => {
};

// Calls this if request object passed
const proxyPerRequest = async (req, data) => {
const proxyPerRequest = async (request, data) => {
let proxy, overrides;
// Separate proxy and overrides
if (type(data) === "object") {
Expand All @@ -51,21 +64,21 @@ const proxyPerRequest = async (req, data) => {
overrides = data;
}
} else {proxy = data}
req = setOverrides(req, overrides);
// Skip request if proxy omitted
if (proxy) {await proxyHandler(req, proxy)}
else {req.continue(overrides)}
if (proxy) {await requestHandler(request, proxy, overrides)}
else {request.continue(overrides)}
};

// Calls this if page object passed
const proxyPerPage = async (page, proxy) => {
await page.setRequestInterception(true);
removeRequestListener(page, "$ppp");
if (proxy) {
page.on("request", $ppp = async (req) => {
await proxyHandler(req, proxy);
});
} else {await page.setRequestInterception(false)}
const listener = "$ppp_request_listener";
removeRequestListener(page, listener);
const f = {[listener]: async (request) => {
await requestHandler(request, proxy);
}};
if (proxy) {page.on("request", f[listener])}
else {await page.setRequestInterception(false)}
};

// Main function
Expand All @@ -74,7 +87,7 @@ const useProxy = async (target, data) => {
if (targetType === "HTTPRequest") {
await proxyPerRequest(target, data);
} else if (targetType === "Page") {
await proxyPerPage(target, data)
await proxyPerPage(target, data);
}
};

Expand Down
Loading

0 comments on commit 7962b25

Please sign in to comment.