diff --git a/src/drivers/npm/cli.js b/src/drivers/npm/cli.js index 5241ec698..ac2c377b0 100755 --- a/src/drivers/npm/cli.js +++ b/src/drivers/npm/cli.js @@ -13,6 +13,7 @@ const aliases = { a: 'userAgent', b: 'batchSize', d: 'debug', + f: 'fast', t: 'delay', h: 'help', H: 'header', @@ -74,6 +75,7 @@ Examples: Options: -b, --batch-size=... Process links in batches -d, --debug Output debug messages + -f, --fast Prioritise speed over accuracy -t, --delay=ms Wait for ms milliseconds between requests -h, --help This text -H, --header Extra header to send with requests diff --git a/src/drivers/npm/driver.js b/src/drivers/npm/driver.js index 3ba42c789..abfbcada2 100644 --- a/src/drivers/npm/driver.js +++ b/src/drivers/npm/driver.js @@ -283,7 +283,11 @@ function analyzeDom(dom, technologies = Wappalyzer.technologies) { } function get(url, options = {}) { - const timeout = options.timeout || 10000 + const timeout = + options.timeout || + (this.options.fast + ? this.Math.min(this.options.maxWait, 3000) + : this.options.maxWait) if (['http:', 'https:'].includes(url.protocol)) { const { get } = url.protocol === 'http:' ? http : https @@ -314,7 +318,7 @@ function get(url, options = {}) { } ) .setTimeout(timeout, () => - reject(new Error(`Timeout (${url.href}, ${timeout}ms)`)) + reject(new Error(`Timeout (${url}, ${timeout}ms)`)) ) .on('error', (error) => reject(new Error(error.message))) ) @@ -345,6 +349,7 @@ class Driver { } this.options.debug = Boolean(+this.options.debug) + this.options.fast = Boolean(+this.options.fast) this.options.recursive = Boolean(+this.options.recursive) this.options.probe = String(this.options.probe || '').toLowerCase() === 'basic' @@ -369,7 +374,7 @@ class Driver { } async init() { - for (let attempt = 1; attempt <= 3; attempt++) { + for (let attempt = 1; attempt <= 2; attempt++) { this.log(`Launching browser (attempt ${attempt})...`) try { @@ -385,7 +390,9 @@ class Driver { acceptInsecureCerts: true, args: chromiumArgs, executablePath: CHROMIUM_BIN, - timeout: 5000, + timeout: this.options.fast + ? Math.min(this.options.maxWait, 10000) + : this.options.maxWait, }) } @@ -393,28 +400,20 @@ class Driver { } catch (error) { this.log(error) - if (attempt >= 3) { + if (attempt >= 2) { throw new Error(error.message || error.toString()) } } } - this.browser.on('disconnected', async () => { - this.log('Browser disconnected') + this.browser.on('disconnected', () => { + this.browser = undefined - if (!this.destroyed) { - try { - await this.init() - } catch (error) { - this.log(error) - } - } + this.log('Browser disconnected') }) } async destroy() { - this.destroyed = true - if (this.browser) { try { await sleep(1) @@ -507,8 +506,6 @@ class Site { this.cache = {} this.probed = false - - this.destroyed = false } log(message, source = 'driver', type = 'log') { @@ -544,7 +541,9 @@ class Site { promise, fallback, errorMessage = 'Operation took too long to complete', - maxWait = Math.min(this.options.maxWait, 3000) + maxWait = this.options.fast + ? Math.min(this.options.maxWait, 2000) + : this.options.maxWait ) { let timeout = null @@ -579,10 +578,6 @@ class Site { } async goto(url) { - if (this.destroyed) { - return - } - // Return when the URL is a duplicate or maxUrls has been reached if (this.analyzedUrls[url.href]) { return [] @@ -640,14 +635,18 @@ class Site { ) { request.abort('blockedbyclient') } else { - const headers = { - ...request.headers(), - ...this.options.headers, - } - await this.emit('request', { page, request }) - request.continue({ headers }) + if (Object.keys(this.options.headers).length) { + const headers = { + ...request.headers(), + ...this.options.headers, + } + + request.continue({ headers }) + } else { + request.continue() + } } } catch (error) { error.message += ` (${url})` @@ -657,7 +656,7 @@ class Site { }) page.on('response', async (response) => { - if (this.destroyed || !page || page.__closed || page.isClosed()) { + if (!page || page.__closed || page.isClosed()) { return } @@ -745,7 +744,7 @@ class Site { } if (!this.options.noScripts) { - await sleep(1000) + await sleep(this.options.fast ? 1000 : 3000) } // page.on('console', (message) => this.log(message.text())) @@ -810,151 +809,170 @@ class Site { let dom = [] if (html) { - // Links - links = !this.options.recursive - ? [] - : await this.promiseTimeout( + await Promise.all([ + (async () => { + // Links + links = !this.options.recursive + ? [] + : await this.promiseTimeout( + ( + await this.promiseTimeout( + page.evaluateHandle(() => + Array.from(document.getElementsByTagName('a')).map( + ({ + hash, + hostname, + href, + pathname, + protocol, + rel, + }) => ({ + hash, + hostname, + href, + pathname, + protocol, + rel, + }) + ) + ), + { jsonValue: () => [] }, + 'Timeout (links)' + ) + ).jsonValue(), + [], + 'Timeout (links)' + ) + })(), + (async () => { + // Text + text = await this.promiseTimeout( ( await this.promiseTimeout( - page.evaluateHandle(() => - Array.from(document.getElementsByTagName('a')).map( - ({ hash, hostname, href, pathname, protocol, rel }) => ({ - hash, - hostname, - href, - pathname, - protocol, - rel, - }) - ) + page.evaluateHandle( + () => + // eslint-disable-next-line unicorn/prefer-text-content + document.body && document.body.innerText ), - { jsonValue: () => [] }, - 'Timeout (links)' + { jsonValue: () => '' }, + 'Timeout (text)' ) ).jsonValue(), - [], - 'Timeout (links)' - ) - - // Text - text = await this.promiseTimeout( - ( - await this.promiseTimeout( - page.evaluateHandle( - () => - // eslint-disable-next-line unicorn/prefer-text-content - document.body && document.body.innerText - ), - { jsonValue: () => '' }, + '', 'Timeout (text)' ) - ).jsonValue(), - '', - 'Timeout (text)' - ) - - // CSS - css = await this.promiseTimeout( - ( - await this.promiseTimeout( - page.evaluateHandle((maxRows) => { - const css = [] + })(), + (async () => { + // CSS + css = await this.promiseTimeout( + ( + await this.promiseTimeout( + page.evaluateHandle((maxRows) => { + const css = [] - try { - if (!document.styleSheets.length) { - return '' - } + try { + if (!document.styleSheets.length) { + return '' + } - for (const sheet of Array.from(document.styleSheets)) { - for (const rules of Array.from(sheet.cssRules)) { - css.push(rules.cssText) + for (const sheet of Array.from(document.styleSheets)) { + for (const rules of Array.from(sheet.cssRules)) { + css.push(rules.cssText) - if (css.length >= maxRows) { - break + if (css.length >= maxRows) { + break + } + } } + } catch (error) { + return '' } - } - } catch (error) { - return '' - } - return css.join('\n') - }, this.options.htmlMaxRows), - { jsonValue: () => '' }, + return css.join('\n') + }, this.options.htmlMaxRows), + { jsonValue: () => '' }, + 'Timeout (css)' + ) + ).jsonValue(), + '', 'Timeout (css)' ) - ).jsonValue(), - '', - 'Timeout (css)' - ) + })(), + (async () => { + // Script tags + ;[scriptSrc, scripts] = await this.promiseTimeout( + ( + await this.promiseTimeout( + page.evaluateHandle(() => { + const nodes = Array.from( + document.getElementsByTagName('script') + ) - // Script tags - ;[scriptSrc, scripts] = await this.promiseTimeout( - ( - await this.promiseTimeout( - page.evaluateHandle(() => { - const nodes = Array.from( - document.getElementsByTagName('script') + return [ + nodes + .filter( + ({ src }) => + src && !src.startsWith('data:text/javascript;') + ) + .map(({ src }) => src), + nodes + .map((node) => node.textContent) + .filter((script) => script), + ] + }), + { jsonValue: () => [] }, + 'Timeout (scripts)' ) - - return [ - nodes - .filter( - ({ src }) => - src && !src.startsWith('data:text/javascript;') - ) - .map(({ src }) => src), - nodes - .map((node) => node.textContent) - .filter((script) => script), - ] - }), - { jsonValue: () => [] }, + ).jsonValue(), + [], 'Timeout (scripts)' ) - ).jsonValue(), - [], - 'Timeout (scripts)' - ) - - // Meta tags - meta = await this.promiseTimeout( - ( - await this.promiseTimeout( - page.evaluateHandle(() => - Array.from(document.querySelectorAll('meta')).reduce( - (metas, meta) => { - const key = - meta.getAttribute('name') || meta.getAttribute('property') - - if (key) { - metas[key.toLowerCase()] = metas[key.toLowerCase()] || [] - - metas[key.toLowerCase()].push( - meta.getAttribute('content') - ) - } - - return metas - }, - {} + })(), + (async () => { + // Meta tags + meta = await this.promiseTimeout( + ( + await this.promiseTimeout( + page.evaluateHandle(() => + Array.from(document.querySelectorAll('meta')).reduce( + (metas, meta) => { + const key = + meta.getAttribute('name') || + meta.getAttribute('property') + + if (key) { + metas[key.toLowerCase()] = + metas[key.toLowerCase()] || [] + + metas[key.toLowerCase()].push( + meta.getAttribute('content') + ) + } + + return metas + }, + {} + ) + ), + { jsonValue: () => [] }, + 'Timeout (meta)' ) - ), - { jsonValue: () => [] }, + ).jsonValue(), + [], 'Timeout (meta)' ) - ).jsonValue(), - [], - 'Timeout (meta)' - ) - - // JavaScript - js = this.options.noScripts - ? [] - : await this.promiseTimeout(getJs(page), [], 'Timeout (js)') - - // DOM - dom = await this.promiseTimeout(getDom(page), [], 'Timeout (dom)') + })(), + (async () => { + // JavaScript + js = this.options.noScripts + ? [] + : await this.promiseTimeout(getJs(page), [], 'Timeout (js)') + })(), + (async () => { + // DOM + dom = await this.promiseTimeout(getDom(page), [], 'Timeout (dom)') + })(), + ]) } this.cache[url.href] = { @@ -1037,7 +1055,9 @@ class Site { } if (error.message.includes('net::ERR_NAME_NOT_RESOLVED')) { - const newError = new Error(`Hostname could not be resolved (${url})`) + const newError = new Error( + `Hostname could not be resolved (${url.hostname})` + ) newError.code = 'WAPPALYZER_DNS_ERROR' @@ -1253,7 +1273,9 @@ class Site { }), [], 'Timeout (dns)', - Math.min(this.options.maxWait, 15000) + this.options.fast + ? Math.min(this.options.maxWait, 15000) + : this.options.maxWait ) } @@ -1452,8 +1474,6 @@ class Site { }) ) - this.destroyed = true - this.log('Site closed') } } diff --git a/src/drivers/npm/package.json b/src/drivers/npm/package.json index 249822619..f9721e5b3 100644 --- a/src/drivers/npm/package.json +++ b/src/drivers/npm/package.json @@ -13,7 +13,7 @@ "software" ], "homepage": "https://www.wappalyzer.com/", - "version": "6.10.65", + "version": "6.10.66", "author": "Wappalyzer", "license": "GPL-3.0", "repository": { diff --git a/src/drivers/webextension/manifest-v2.json b/src/drivers/webextension/manifest-v2.json index 3247d77d7..559752f14 100644 --- a/src/drivers/webextension/manifest-v2.json +++ b/src/drivers/webextension/manifest-v2.json @@ -4,7 +4,7 @@ "author": "Wappalyzer", "homepage_url": "https://www.wappalyzer.com/", "description": "Identify web technologies", - "version": "6.10.65", + "version": "6.10.66", "default_locale": "en", "manifest_version": 2, "icons": { diff --git a/src/drivers/webextension/manifest-v3.json b/src/drivers/webextension/manifest-v3.json index f1d462cc6..1509cf1cd 100644 --- a/src/drivers/webextension/manifest-v3.json +++ b/src/drivers/webextension/manifest-v3.json @@ -4,7 +4,7 @@ "author": "Wappalyzer", "homepage_url": "https://www.wappalyzer.com/", "description": "Identify web technologies", - "version": "6.10.65", + "version": "6.10.66", "default_locale": "en", "manifest_version": 3, "icons": { diff --git a/src/package.json b/src/package.json index ecceb69b7..7591d156f 100644 --- a/src/package.json +++ b/src/package.json @@ -13,7 +13,7 @@ "software" ], "homepage": "https://www.wappalyzer.com/", - "version": "6.10.65", + "version": "6.10.66", "author": "Wappalyzer", "license": "GPL-3.0", "repository": { diff --git a/src/technologies/b.json b/src/technologies/b.json index 5ac38802a..69456c43d 100644 --- a/src/technologies/b.json +++ b/src/technologies/b.json @@ -2871,4 +2871,4 @@ "scriptSrc": "cdn\\.ibdok\\.ir/", "website": "https://bdok.ir" } -} +} \ No newline at end of file diff --git a/src/technologies/h.json b/src/technologies/h.json index c6564cc98..fdd55b59c 100644 --- a/src/technologies/h.json +++ b/src/technologies/h.json @@ -389,20 +389,6 @@ "scriptSrc": "cdn\\.blog\\.st-hatena\\.com/", "website": "https://hatenablog.com" }, - "Header Bidding Ai": { - "cats": [ - 36 - ], - "description": "Header Bidding Ai is a provider of an automated and managed header bidding solution. Header bidding cutting-edge technique where publishers offer their ad inventory to many ad exchanges.", - "icon": "Header Bidding Ai.svg", - "scriptSrc": "\\.headerbidding\\.ai/", - "saas": true, - "pricing": [ - "poa", - "recurring" - ], - "website": "https://headerbidding.ai" - }, "HeadJS": { "cats": [ 59 @@ -415,6 +401,20 @@ "scriptSrc": "head\\.(?:core|load)(?:\\.min)?\\.js", "website": "https://headjs.com" }, + "Header Bidding Ai": { + "cats": [ + 36 + ], + "description": "Header Bidding Ai is a provider of an automated and managed header bidding solution. Header bidding cutting-edge technique where publishers offer their ad inventory to many ad exchanges.", + "icon": "Header Bidding Ai.svg", + "pricing": [ + "poa", + "recurring" + ], + "saas": true, + "scriptSrc": "\\.headerbidding\\.ai/", + "website": "https://headerbidding.ai" + }, "Headless UI": { "cats": [ 66 diff --git a/src/technologies/s.json b/src/technologies/s.json index 65ccaa28c..71aad35e8 100644 --- a/src/technologies/s.json +++ b/src/technologies/s.json @@ -1955,22 +1955,6 @@ "scriptSrc": "widget\\.sezzle\\.(?:in|com)", "website": "https://sezzle.com/" }, - "shadcn/ui": { - "cats": [ - 66 - ], - "css": [ - "--destructive-foreground" - ], - "description": "shadcn/ui is a component system built with Radix UI and Tailwind CSS.", - "icon": "shadcn-ui.svg", - "oss": true, - "implies": [ - "Radix UI", - "Tailwind CSS" - ], - "website": "https://ui.shadcn.com" - }, "Shaka Player": { "cats": [ 14 @@ -6720,11 +6704,11 @@ ], "description": "Summernote is an open-source JavaScript library that offers a feature-rich WYSIWYG editor for web applications, allowing users to create and edit formatted content in a familiar word processor-like interface.", "icon": "Summernote.svg", + "oss": true, "scriptSrc": [ "/(?:S|s)ummernote(?:\\.min)?\\.js", "/summernote(?:@|-)([\\d\\.]+)/\\;version:\\1" ], - "oss": true, "website": "https://summernote.org" }, "Sumo": { @@ -7310,6 +7294,22 @@ "scriptSrc": "scrollreveal(?:\\.min)(?:\\.js)", "website": "https://scrollrevealjs.org" }, + "shadcn/ui": { + "cats": [ + 66 + ], + "css": [ + "--destructive-foreground" + ], + "description": "shadcn/ui is a component system built with Radix UI and Tailwind CSS.", + "icon": "shadcn-ui.svg", + "implies": [ + "Radix UI", + "Tailwind CSS" + ], + "oss": true, + "website": "https://ui.shadcn.com" + }, "shine.js": { "cats": [ 25 @@ -7416,4 +7416,4 @@ }, "website": "https://styled-components.com" } -} +} \ No newline at end of file