diff --git a/lib/core.js b/lib/core.js index 4dd99bccb..3e3e77bb8 100644 --- a/lib/core.js +++ b/lib/core.js @@ -1437,7 +1437,8 @@ responseStatusCode: 'responseStatusCode', responseError: 'responseError', timeout: 'timeout', - fallback: 'fallback' + fallback: 'fallback', + retry: 'retry' // Make same iframely call with different options. }; function findRedirectError(result) { @@ -1458,6 +1459,21 @@ } } + function findRetryError(result) { + if (result) { + for(var i = 0; i < result.length; i++) { + var r = result[i]; + var retry = r.error && r.error[SYS_ERRORS.retry]; + if (typeof retry === "object") { + log(' -- plugin retry (by "' + r.method.pluginId + '")'); + return retry; + } else if (retry && typeof retry !== "object") { + log(' -- skip plugin retry, not object (by "' + r.method.pluginId + '")', retry); + } + } + } + } + function findResponseError(result, uri) { if (result) { for(var i = 0; i < result.length; i++) { @@ -1739,6 +1755,9 @@ options = {}; } + // Store original to run on retry. + var originalOptions = {...options}; + var initialCallback = cb; var fallbackInfo; @@ -1786,6 +1805,11 @@ return; } + if (options.retriesCount && options.retriesCount > (options.maxRedirects || CONFIG.MAX_REDIRECTS)) { + callbackWithErrorCode('retry loop', options, cb); + return; + } + // Remove default :443 and :80 from uri. uri = uri .replace(/^(http:\/\/[^\/]+):80(?!\d)/, '$1') @@ -1952,6 +1976,25 @@ return; } + // Find retry command. + var retry_data = findRetryError(result); + if (retry_data + && options.retriesCount + // Prerender not changed + && !!options.prerender === !!retry_data.prerender + // User agent not changed + && options.user_agent == retry_data.user_agent) { + log(' -- ignore recursive prerender retry'); + } else if (retry_data) { + abortCurrentRequest(); + var retryOptions = Object.assign({}, originalOptions, retry_data); + retryOptions.retriesCount = (retryOptions.retriesCount || 0) + 1; + retryOptions.refresh = true; + run(uri, retryOptions, cb); + aborted = true; + return; + } + // Gather results. // Run before `responseError` to collect data and send in error. var hasNewData = useResult(usedMethods, context, pluginsContexts, allResults, result, options, asyncMethodCb); diff --git a/lib/plugins/system/htmlparser/CollectingHandlerForMutliTarget.js b/lib/plugins/system/htmlparser/CollectingHandlerForMutliTarget.js index efe915b80..29b8c6ff4 100644 --- a/lib/plugins/system/htmlparser/CollectingHandlerForMutliTarget.js +++ b/lib/plugins/system/htmlparser/CollectingHandlerForMutliTarget.js @@ -7,6 +7,7 @@ export function CollectingHandlerForMutliTarget(cbsArray){ this._cbsArray = cbsArray || []; + this._virtualCount = 0; this.events = []; } @@ -35,11 +36,16 @@ EVENTS.forEach(function(name) { }; }); +CollectingHandlerForMutliTarget.prototype.hasNoHandlers = function() { + return this._cbsArray.length === 0 && this._virtualCount === 0; +}; + CollectingHandlerForMutliTarget.prototype.addHandler = function(cbs) { + const wasEmpty = this.hasNoHandlers(); this._cbsArray.push(cbs); this._emitEventsFor(cbs); - if (this._cbsArray.length === 1) { + if (wasEmpty) { // Got first handler, resume stream. this.onFirstHandler(); } @@ -52,7 +58,7 @@ CollectingHandlerForMutliTarget.prototype.removeHandler = function(cbs) { var idx = that._cbsArray.indexOf(cbs); if (idx > -1) { that._cbsArray.splice(idx, 1); - if (that._cbsArray.length === 0) { + if (that.hasNoHandlers()) { // No handlers, pause stream. that.onNoHandlers(); } @@ -60,6 +66,26 @@ CollectingHandlerForMutliTarget.prototype.removeHandler = function(cbs) { }); }; +CollectingHandlerForMutliTarget.prototype.addVirtualHandler = function() { + const wasEmpty = this.hasNoHandlers(); + this._virtualCount++; + if (wasEmpty) { + this.onFirstHandler(); + } +}; + +CollectingHandlerForMutliTarget.prototype.removeVirtualHandler = function() { + var that = this; + process.nextTick(function() { + if (that._virtualCount > 0) { + that._virtualCount--; + if (that.hasNoHandlers()) { + that.onNoHandlers(); + } + } + }); +}; + CollectingHandlerForMutliTarget.prototype.emitCb = function(event) { this.events.push(event); this.callCb(event); @@ -128,4 +154,4 @@ CollectingHandlerForMutliTarget.prototype._emitEventsFor = function(cbs) { } }; -export const notPlugin = true; \ No newline at end of file +export const notPlugin = true; diff --git a/lib/plugins/system/htmlparser/HtmlHandler.js b/lib/plugins/system/htmlparser/HtmlHandler.js new file mode 100644 index 000000000..8f33e9638 --- /dev/null +++ b/lib/plugins/system/htmlparser/HtmlHandler.js @@ -0,0 +1,39 @@ +export class HtmlHandler { + + constructor() { + this.text = ''; + this._onEndCallbacks = []; + this._ended = false; + } + + onData(chunk) { + // TODO: decode? + this.text += chunk; + } + + onEnd(callback) { + if (typeof callback === 'function') { + if (this._ended) { + callback(this.text); + } else { + this._onEndCallbacks.push(callback); + } + } + } + + end() { + this._ended = true; + for (const cb of this._onEndCallbacks) { + cb(this.text); + } + this._onEndCallbacks = []; + } + + attach(resp) { + resp.on('data', chunk => this.onData(chunk)); + resp.on('end', () => this.end()); + return this; + } +} + +export const notPlugin = true; diff --git a/lib/plugins/system/htmlparser/htmlparser.js b/lib/plugins/system/htmlparser/htmlparser.js index 0cedbf49a..df808c495 100644 --- a/lib/plugins/system/htmlparser/htmlparser.js +++ b/lib/plugins/system/htmlparser/htmlparser.js @@ -7,11 +7,13 @@ import * as metaUtils from '../meta/utils.js'; import { extendCookiesJar } from '../../../fetch.js'; var getUrlFunctional = utils.getUrlFunctional; import { CollectingHandlerForMutliTarget } from './CollectingHandlerForMutliTarget.js'; +import { HtmlHandler } from './HtmlHandler.js'; export default { provides: [ 'self', + 'htmlresponse', '__nonHtmlContentData', '__nonHtmlContentResponse', '__statusCode' @@ -201,11 +203,22 @@ export default { handler.abortController = abortController; handler.h2 = resp.h2; - // Do before resume? - cb(null, { + var result = { htmlparser: handler - }); + }; + + if (options.enableHtmlResponse) { + // Collect full response text. + var htmlHandler = new HtmlHandler(); + // Bind on data, on end. + htmlHandler.attach(resp); + result.htmlresponse = htmlHandler; + } + + // Do before resume? + cb(null, result); + // Proxy data. resp.on('data', parser.write.bind(parser)); resp.on('end', parser.end.bind(parser)); diff --git a/lib/plugins/system/meta/utils.js b/lib/plugins/system/meta/utils.js index 7187ab536..dff12cf89 100644 --- a/lib/plugins/system/meta/utils.js +++ b/lib/plugins/system/meta/utils.js @@ -3,7 +3,7 @@ export function getMetaCacheKey(url, whitelistRecord, options) { var meta_key = 'meta'; if (options.metaKeyPrefix) { - meta_key += ':' + options.metaKeyPrefix; + meta_key += ':' + options.metaKeyPrefix; } meta_key += ':' + url; diff --git a/lib/utils.js b/lib/utils.js index dd3d58741..361a7aa78 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -102,11 +102,15 @@ export function prepareRequestOptions(request_options, options) { if (typeof prerender_option === 'string') { request_options.uri += (request_options.uri.indexOf('?') > -1 ? '&' : '?' ) + `prerender=${prerender_option}`; } + + if (typeof options?.prerender?.overrideOptions === 'function') { + options.prerender.overrideOptions(request_options); + } } } // Some calls (like oembed) use basic options without `getDomainOptions`. - var enable_domain_prerender = options?.getDomainOptions && options.getDomainOptions('meta.prerender'); + var enable_domain_prerender = options?.getDomainOptions && options.getDomainOptions('meta.prerender') || options?.prerender; if (enable_domain_prerender) { setPrerender(enable_domain_prerender); } diff --git a/modules/api/views.js b/modules/api/views.js index c555d1136..89d6dfa23 100644 --- a/modules/api/views.js +++ b/modules/api/views.js @@ -140,6 +140,7 @@ export default function(app) { dataMode: getBooleanParam(req, 'dataMode'), forceParams: req.query.meta === "true" ? CONFIG.DEBUG_CONTEXTS : null, whitelist: getBooleanParam(req, 'whitelist'), + // TOOD: obsolete? readability: getBooleanParam(req, 'readability'), getWhitelistRecord: whitelist.findWhitelistRecordFor, maxWidth: getIntParam(req, 'maxwidth') || getIntParam(req, 'max-width'), @@ -158,9 +159,16 @@ export default function(app) { } if (result.safe_html) { + var readerJsParams = new url.URLSearchParams({ + uri: uri + }); + if (req.query.dataMode) { + readerJsParams.set('dataMode', req.query.dataMode); + } + cache.set('html:' + version + ':' + uri, result.safe_html); result.links.unshift({ - href: CONFIG.baseAppUrl + "/reader.js?uri=" + encodeURIComponent(uri), + href: CONFIG.baseAppUrl + '/reader.js?' + readerJsParams.toString(), type: CONFIG.T.javascript, rel: [CONFIG.R.reader, CONFIG.R.inline] }); @@ -280,7 +288,9 @@ export default function(app) { iframelyCore.run(uri, { v: '1.3', getWhitelistRecord: whitelist.findWhitelistRecordFor, - readability: true + // TOOD: obsolete? + readability: true, + dataMode: getBooleanParam(req, 'dataMode'), }, function(error, data) { if (!data || !data.safe_html) { diff --git a/plugins/links/prerender/prerender.js b/plugins/links/prerender/prerender.js deleted file mode 100644 index be4e591b5..000000000 --- a/plugins/links/prerender/prerender.js +++ /dev/null @@ -1,52 +0,0 @@ -import utils from './utils.js'; - -export default { - - highestPriority: true, - - provides: ['appUriData', 'whenPrerender'], - - getData: function(url, __allowJSRender, iframelyRun, options, meta, cb) { - - if (CONFIG.PRERENDER && CONFIG.PRERENDER_URL && options.user_agent === CONFIG.FB_USER_AGENT - && !url.startsWith(CONFIG.PRERENDER_URL)) { - - var prerenderUrl = CONFIG.PRERENDER_URL + encodeURIComponent(url); - var options2 = {...options, ...{ - debug: false, - refresh: true - }}; - - iframelyRun(prerenderUrl, options2, function(error, data) { - - var title = data && data.meta && ((data.meta.og && data.meta.og.title) || (data.meta.twitter && data.meta.twitter.title) || data.meta.title || data.meta['html-title']); - - if (data && data.meta && utils.maybeApp(data.meta)) { - return cb({ - responseStatusCode: 415 - }); - } else { - if (data.meta.canonical - && data.meta.canonical.startsWith(CONFIG.PRERENDER_URL) - ) { - delete data.meta.canonical; - } - return cb(error, { - appUriData: data, - whenPrerender: true - }); - } - }); - } else { - return cb(); - } - }, - - getMeta: function(appUriData, whenPrerender) { - return {...appUriData.meta}; - }, - - getLinks: function(appUriData, whenPrerender) { - return appUriData.links; - } -}; \ No newline at end of file diff --git a/plugins/links/prerender/react-app-fb-fallback.js b/plugins/links/prerender/react-app-fb-fallback.js deleted file mode 100644 index 35182ca4b..000000000 --- a/plugins/links/prerender/react-app-fb-fallback.js +++ /dev/null @@ -1,44 +0,0 @@ -import utils from './utils.js'; - -export default { - - highestPriority: true, - - provides: ['appUriData', 'whenReact'], - - getData: function(url, __allowJSRender, iframelyRun, options, cb) { - - if (options.user_agent === CONFIG.FB_USER_AGENT - || CONFIG.PRERENDER_URL && url.startsWith(CONFIG.PRERENDER_URL)) { - return cb(); - } - - var options2 = {...options, ...{ - debug: false, - refresh: true, - user_agent: CONFIG.FB_USER_AGENT - }}; - - iframelyRun(url, options2, function(error, data) { - - if (data && data.meta && utils.maybeApp(data.meta)) { - return cb({ - responseStatusCode: 415 - }); - } else { - return cb(error, { - appUriData: data, - whenReact: true - }); - } - }); - }, - - getMeta: function(appUriData, whenReact) { - return {...appUriData.meta} - }, - - getLinks: function(appUriData, whenReact) { - return appUriData.links; - } -}; \ No newline at end of file diff --git a/plugins/links/prerender/checkAppFlag.js b/plugins/links/prerender/retry-prerender.js similarity index 50% rename from plugins/links/prerender/checkAppFlag.js rename to plugins/links/prerender/retry-prerender.js index 58cc3aa30..26e956e51 100644 --- a/plugins/links/prerender/checkAppFlag.js +++ b/plugins/links/prerender/retry-prerender.js @@ -2,24 +2,21 @@ import utils from './utils.js'; export default { - provides: '__allowJSRender', + notPlugin: !CONFIG.PRERENDER_URL, - listed: true, + getData: function(meta, cb) { - getData: function(meta, url, options, cb) { + if (utils.maybeSPA(meta)) { - if (CONFIG.PRERENDER_URL && url.startsWith(CONFIG.PRERENDER_URL)) { - return cb(); - } - - if (utils.maybeApp(meta)) { - return cb(null, { - __allowJSRender: true, + return cb({ + retry: { + prerender: true + }, message: "This looks like JS app with no prerender. If you are the owner, please run templates on the server for Iframely robot." }); + } else { return cb(); } - } }; \ No newline at end of file diff --git a/plugins/links/prerender/utils.js b/plugins/links/prerender/utils.js index eca81159d..66ac0eac6 100644 --- a/plugins/links/prerender/utils.js +++ b/plugins/links/prerender/utils.js @@ -2,7 +2,7 @@ export default { notPlugin: true, - maybeApp: function(meta) { + maybeSPA: function(meta) { const title = (meta.og && meta.og.title) || (meta.twitter && meta.twitter.title) || meta.title || meta['html-title']; const maybeApp = diff --git a/views/readerjs.ejs b/views/readerjs.ejs index b22b2bfce..8a99c5d60 100644 --- a/views/readerjs.ejs +++ b/views/readerjs.ejs @@ -18,11 +18,11 @@ $cont.parent().trigger('iframely.loaded'); } - var $container = $('[data-used!=true][iframely-container-for$="' + encodeURIComponent(iframely.uri) + '"]:first'); + var $container = $('[data-used!=true][iframely-container-for*="' + encodeURIComponent(iframely.uri) + '"]:first'); if ($container.length == 0) { - var $script = $('script[data-used!=true][src$="' + encodeURIComponent(iframely.uri) + '"]:first'); + var $script = $('script[data-used!=true][src*="' + encodeURIComponent(iframely.uri) + '"]:first'); $script.attr('data-used', true); var $container = $('
');