From 9d33192551cf6f87d7add4e2942f86b52d79c440 Mon Sep 17 00:00:00 2001 From: Cuadrix <51061675+Cuadrix@users.noreply.github.com> Date: Mon, 10 Oct 2022 10:24:01 +0200 Subject: [PATCH] v1.2.9 --- changelog.md | 8 ++++++ package.json | 14 +++++----- src/core/lookup.js | 63 ++++++++++++++++++++++++++++++-------------- src/core/proxy.js | 65 ++++++++++++++++++++++------------------------ src/index.d.ts | 10 +++---- src/lib/cdp.js | 2 +- src/lib/cookies.js | 11 +++++--- src/lib/options.js | 4 +-- 8 files changed, 105 insertions(+), 72 deletions(-) diff --git a/changelog.md b/changelog.md index 39863ee..1e76f38 100644 --- a/changelog.md +++ b/changelog.md @@ -1,4 +1,12 @@ # Change log +### [1.2.9] - 2022-10-10 +#### Fixes +- Allow ports in request url for `host` request header ([#61](https://github.com/Cuadrix/puppeteer-page-proxy/issues/61)) ([#62](https://github.com/Cuadrix/puppeteer-page-proxy/pull/62)) +- Take into account how `CDPSession` client is exposed in latest versions of Puppeteer ([#78](https://github.com/Cuadrix/puppeteer-page-proxy/issues/78)) ([#79](https://github.com/Cuadrix/puppeteer-page-proxy/pull/79)) +- Allow domain cookies to be unset ([#48](https://github.com/Cuadrix/puppeteer-page-proxy/issues/48)) ([#48#issuecomment-729802384](https://github.com/Cuadrix/puppeteer-page-proxy/issues/48#issuecomment-729802384)) +- Take into account that `request.frame()` might return `null` ([#36](https://github.com/Cuadrix/puppeteer-page-proxy/issues/36)) ([#43](https://github.com/Cuadrix/puppeteer-page-proxy/issues/43)) ([#59](https://github.com/Cuadrix/puppeteer-page-proxy/issues/59)) ([#36#issuecomment-814520620](https://github.com/Cuadrix/puppeteer-page-proxy/issues/36#issuecomment-814520620)) +- Update differentiation between page and http request objects for latest versions of Puppeteer (`Page` -> `CDPPage`) +- Update `lookup` method for latest versions of Puppeteer ### [1.2.8] - 2020-07-21 #### Changes - Fixed silent failure when there was an invalid host in the cookies set by the server ([#32](https://github.com/Cuadrix/puppeteer-page-proxy/issues/32)) diff --git a/package.json b/package.json index 5ed52af..3529098 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "puppeteer-page-proxy", "description": "Additional Node.js module to use with 'puppeteer' for setting proxies per page basis.", - "version": "1.2.8", + "version": "1.2.9", "author": "Cuadrix (https://github.com/Cuadrix)", "homepage": "https://github.com/Cuadrix/puppeteer-page-proxy", "main": "./src/index.js", @@ -22,10 +22,10 @@ ], "license": "MIT", "dependencies": { - "got": "^11.5.1", - "http-proxy-agent": "^4.0.1", - "https-proxy-agent": "^5.0.0", - "socks-proxy-agent": "^5.0.0", - "tough-cookie": "^4.0.0" + "got": "^11.8.5", + "http-proxy-agent": "^5.0.0", + "https-proxy-agent": "^5.0.1", + "socks-proxy-agent": "^7.0.0", + "tough-cookie": "^4.1.2" } -} \ No newline at end of file +} diff --git a/src/core/lookup.js b/src/core/lookup.js index 2b1d95b..04e9fdc 100644 --- a/src/core/lookup.js +++ b/src/core/lookup.js @@ -1,36 +1,61 @@ -const lookup = async (page, lookupService = "https://api64.ipify.org?format=json", isJSON = true, timeout = 30000) => { +const defaults = { + url: "https://api64.ipify.org?format=json", + json: true, + timeout: 30000 +}; + +const onLookupFail = (message) => {console.error(message)} +const isOnLookupFailExposed = new WeakMap(); + +const lookup = async (page, lookupServiceUrl = defaults.url, isJSON = defaults.json, timeout = defaults.timeout) => { const doLookup = async () => { - return await page.evaluate((lookupService, timeout, isJSON) => { + // Wait for network to be idle before evaluating code in page context + await page.waitForNetworkIdle(); + return await page.evaluate((pageUrl, lookupServiceUrl, timeout, isJSON) => { return new Promise((resolve) => { const request = new XMLHttpRequest(); request.timeout = timeout; request.onload = () => { if (request.status >= 200 && request.status <= 299) { resolve(isJSON ? JSON.parse(request.responseText) : request.responseText); - } else {resolve(onLookupFailed( - `Request from ${window.location.href} to ` + - `${lookupService} failed with status code ${request.status}` - ))} + } else { + // Print message to browser and NodeJS console + const failMessage = + `Lookup request from ${pageUrl} to ${lookupServiceUrl} ` + + `failed with status code ${request.status}`; + console.error(failMessage); + $ppp_onLookupFail(failMessage); + resolve(); + } + }; + request.ontimeout = () => { + // Print message to browser and NodeJS console + const timeOutMessage = + `Lookup request from ${pageUrl} to ${lookupServiceUrl} ` + + `timed out at ${request.timeout} ms`; + console.error(timeOutMessage); + $ppp_onLookupFail(timeOutMessage); + resolve(); }; - request.ontimeout = (error) => {resolve(onLookupFailed( - `Request from ${window.location.href} to ` + - `${lookupService} timed out at ${request.timeout} ms` - ))}; - request.open("GET", lookupService, true); + request.open("GET", lookupServiceUrl, true); request.send(); }); - }, lookupService, timeout, isJSON); + }, page.url(), lookupServiceUrl, timeout, isJSON); }; try { - await page.setBypassCSP(true); - const functionName = "$ppp_on_lookup_failed"; - if (!page._pageBindings.has(functionName)) { - await page.exposeFunction(functionName, (failReason) => { - console.error(failReason); return; - }); + // Expose function to log error on NodeJS side + // Deal with already exposed error by explicitly keeping track of function exposure + if (!isOnLookupFailExposed.get(page)) { + await page.exposeFunction("$ppp_onLookupFail", onLookupFail); + isOnLookupFailExposed.set(page, true); } + // Stop keeping track of exposure if page is closed + if (page.isClosed()) { + isOnLookupFailExposed.delete(page); + } + await page.setBypassCSP(true); return await doLookup(); - } catch(error) {console.error(error)} + } catch(error) {console.log(error)} }; module.exports = lookup; \ No newline at end of file diff --git a/src/core/proxy.js b/src/core/proxy.js index 0445aa1..7a67584 100644 --- a/src/core/proxy.js +++ b/src/core/proxy.js @@ -10,7 +10,7 @@ const requestHandler = async (request, proxy, overrides = {}) => { request.continue(); return; } const cookieHandler = new CookieHandler(request); - // Request options for Got accounting for overrides + // Request options for GOT accounting for overrides const options = { cookieJar: await cookieHandler.getCookies(), method: overrides.method || request.method(), @@ -57,42 +57,39 @@ const removeRequestListener = (page, listenerName) => { } }; -// Calls this if request object passed -const proxyPerRequest = async (request, data) => { - let proxy, overrides; - // Separate proxy and overrides - if (type(data) === "object") { - if (Object.keys(data).length !== 0) { - proxy = data.proxy; - delete data.proxy; - overrides = data; - } - } else {proxy = data} - // Skip request if proxy omitted - if (proxy) {await requestHandler(request, proxy, overrides)} - else {request.continue(overrides)} -}; +const useProxyPer = { + // Call this if request object passed + HTTPRequest: async (request, data) => { + let proxy, overrides; + // Separate proxy and overrides + if (type(data) === "object") { + if (Object.keys(data).length !== 0) { + proxy = data.proxy; + delete data.proxy; + overrides = data; + } + } else {proxy = data} + // Skip request if proxy omitted + if (proxy) {await requestHandler(request, proxy, overrides)} + else {request.continue(overrides)} + }, -// Calls this if page object passed -const proxyPerPage = async (page, proxy) => { - await page.setRequestInterception(true); - const listener = "$ppp_request_listener"; - removeRequestListener(page, listener); - const f = {[listener]: async (request) => { - await requestHandler(request, proxy); - }}; - if (proxy) {page.on("request", f[listener])} - else {await page.setRequestInterception(false)} -}; + // Call this if page object passed + CDPPage: async (page, proxy) => { + await page.setRequestInterception(true); + const listener = "$ppp_requestListener"; + removeRequestListener(page, listener); + const f = {[listener]: async (request) => { + await requestHandler(request, proxy); + }}; + if (proxy) {page.on("request", f[listener])} + else {await page.setRequestInterception(false)} + } +} // Main function const useProxy = async (target, data) => { - const targetType = target.constructor.name; - if (targetType === "HTTPRequest") { - await proxyPerRequest(target, data); - } else if (targetType === "Page") { - await proxyPerPage(target, data); - } + useProxyPer[target.constructor.name](target, data); }; -module.exports = useProxy; +module.exports = useProxy; \ No newline at end of file diff --git a/src/index.d.ts b/src/index.d.ts index 1030a70..4b09b68 100644 --- a/src/index.d.ts +++ b/src/index.d.ts @@ -1,4 +1,4 @@ -export = puppeteer_page_proxy; +export = useProxy; /** * **Set a proxy to use in a given page or request.** * @@ -11,8 +11,8 @@ export = puppeteer_page_proxy; * @param page 'Page' or 'Request' object to set a proxy for. * @param proxy Proxy to use in the current page. Must begin with a protocol e.g. **http://**, **https://**, **socks://**. */ -declare function puppeteer_page_proxy(page: object, proxy: string | object): Promise; -declare namespace puppeteer_page_proxy { +declare function useProxy(page: object, proxy: string | object): Promise; +declare namespace useProxy { /** * **Request data from a lookupservice.** * @@ -23,9 +23,9 @@ declare namespace puppeteer_page_proxy { * console.log(data.ip); * ``` * @param page 'Page' object to execute the request on. - * @param lookupService External lookup service to request data from. Fetches data from `api64.ipify.org` by default. + * @param lookupServiceUrl External lookup service to request data from. Fetches data from `api64.ipify.org` by default. * @param isJSON Whether to JSON.parse the received response. Defaults to `true`. * @param timeout Time in milliseconds after which the request times out. Defaults to `30000` ms. */ - function lookup(page: object, lookupService?: string, isJSON?: boolean, timeout?: number | string): Promise; + function lookup(page: object, lookupServiceUrl?: string, isJSON?: boolean, timeout?: number | string): Promise; } diff --git a/src/lib/cdp.js b/src/lib/cdp.js index 2db3bdc..b366126 100644 --- a/src/lib/cdp.js +++ b/src/lib/cdp.js @@ -15,4 +15,4 @@ class CDP { } } -module.exports = CDP; +module.exports = CDP; \ No newline at end of file diff --git a/src/lib/cookies.js b/src/lib/cookies.js index fe6d2ed..2212190 100644 --- a/src/lib/cookies.js +++ b/src/lib/cookies.js @@ -64,8 +64,11 @@ const formatCookie = (cookie) => { class CookieHandler extends CDP { constructor(request) { super(request._client || request.client); - this.url = request.isNavigationRequest() ? request.url() : request.frame().url(); - this.domain = new URL(this.url).hostname; + this.url = + (request.isNavigationRequest() || request.frame() == null) + ? request.url() + : request.frame().url(); + this.domain = (this.url) ? new URL(this.url).hostname : ""; } // Parse an array of raw cookies to an array of cookie objects parseCookies(rawCookies) { @@ -85,7 +88,7 @@ class CookieHandler extends CDP { const toughCookies = this.formatCookies(browserCookies); // Add cookies to cookieJar const cookieJar = CookieJar.deserializeSync({ - version: 'tough-cookie@4.0.0', + version: 'tough-cookie@4.1.2', storeType: 'MemoryCookieStore', rejectPublicSuffixes: true, cookies: toughCookies @@ -111,4 +114,4 @@ class CookieHandler extends CDP { } } -module.exports = CookieHandler; +module.exports = CookieHandler; \ No newline at end of file diff --git a/src/lib/options.js b/src/lib/options.js index 481defb..f11be7e 100644 --- a/src/lib/options.js +++ b/src/lib/options.js @@ -9,7 +9,7 @@ const setHeaders = (request) => { ...request.headers(), "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "accept-encoding": "gzip, deflate, br", - "host": new URL(request.url()).host // hostname + port + "host": new URL(request.url()).host } if (request.isNavigationRequest()) { headers["sec-fetch-mode"] = "navigate"; @@ -36,4 +36,4 @@ const setAgent = (proxy) => { }; }; -module.exports = {setHeaders, setAgent}; +module.exports = {setHeaders, setAgent}; \ No newline at end of file