diff --git a/src/tasks/opportunity-status-processor/handler.js b/src/tasks/opportunity-status-processor/handler.js index 179a80c..3fee45c 100644 --- a/src/tasks/opportunity-status-processor/handler.js +++ b/src/tasks/opportunity-status-processor/handler.js @@ -23,27 +23,44 @@ import { OPPORTUNITY_DEPENDENCY_MAP } from './opportunity-dependency-map.js'; const TASK_TYPE = 'opportunity-status-processor'; const AUDIT_WORKER_LOG_GROUP = '/aws/lambda/spacecat-services--audit-worker'; +/** + * Toggles the www subdomain in a hostname + * @param {string} hostname - The hostname to toggle + * @returns {string} The hostname with www toggled + */ +function toggleWWWHostname(hostname) { + return hostname.startsWith('www.') ? hostname.replace('www.', '') : `www.${hostname}`; +} + /** * Checks if RUM is available for a domain by attempting to get a domainkey + * Tries both with and without www prefix * @param {string} domain - The domain to check * @param {object} context - The context object with env and log * @returns {Promise} True if RUM is available, false otherwise */ async function isRUMAvailable(domain, context) { const { log } = context; + const rumClient = RUMAPIClient.createFrom(context); try { - const rumClient = RUMAPIClient.createFrom(context); - - // Attempt to get domainkey - if this succeeds, RUM is available await rumClient.retrieveDomainkey(domain); - log.info(`RUM is available for domain: ${domain}`); return true; } catch (error) { - log.info(`RUM is not available for domain: ${domain}. Reason: ${error.message}`); - return false; + log.warn(`RUM is not available for domain: ${domain}. Reason: ${error.message}`); } + + // Try with www-toggled domain + const wwwToggledDomain = toggleWWWHostname(domain); + try { + await rumClient.retrieveDomainkey(wwwToggledDomain); + log.info(`RUM is available for domain: ${wwwToggledDomain}`); + return true; + } catch (error) { + log.warn(`RUM not available for ${wwwToggledDomain}: ${error.message}`); + } + return false; } /** @@ -469,47 +486,55 @@ export async function runOpportunityStatusProcessor(message, context) { const needsScraping = requiredDependencies.has('scraping'); const needsGSC = requiredDependencies.has('GSC'); - // Only check data sources that are needed - if (siteUrl && (needsRUM || needsGSC || needsScraping)) { - try { - const resolvedUrl = await resolveCanonicalUrl(siteUrl); - log.info(`Resolved URL: ${resolvedUrl}`); - const domain = new URL(resolvedUrl).hostname; - - if (needsRUM) { - rumAvailable = await isRUMAvailable(domain, context); + if (!siteUrl) { + log.warn('No siteUrl provided, skipping RUM, GSC, and scraping checks'); + } else { + let resolvedUrl = null; + if (needsRUM || needsGSC) { + resolvedUrl = await resolveCanonicalUrl(siteUrl); + log.info(`Resolved URL: ${resolvedUrl || 'null'} for ${siteUrl}`); + + if (!resolvedUrl) { + log.warn(`Could not resolve canonical URL for: ${siteUrl}. Site may be unreachable.`); + if (slackContext) { + await say(env, log, slackContext, `:warning: Could not resolve canonical URL for \`${siteUrl}\`. Site may be unreachable.`); + } } + } - if (needsGSC) { - gscConfigured = await isGSCConfigured(resolvedUrl, context); - } + if (needsRUM) { + const urlToCheck = resolvedUrl || siteUrl; + const domain = new URL(urlToCheck).hostname; + rumAvailable = await isRUMAvailable(domain, context); + } - if (needsScraping) { - const scrapingCheck = await isScrapingAvailable(siteUrl, context); - scrapingAvailable = scrapingCheck.available; - - // Send Slack notification with scraping statistics if available - if (scrapingCheck.stats && slackContext) { - const { completed, failed, total } = scrapingCheck.stats; - const statsMessage = `:mag: *Scraping Statistics for ${siteUrl}*\n` - + `✅ Completed: ${completed}\n` - + `❌ Failed: ${failed}\n` - + `📊 Total: ${total}`; - - if (failed > 0) { - await say( - env, - log, - slackContext, - `${statsMessage}\n:information_source: _${failed} failed URLs will be retried on re-onboarding._`, - ); - } else { - await say(env, log, slackContext, statsMessage); - } + if (needsGSC && resolvedUrl) { + gscConfigured = await isGSCConfigured(resolvedUrl, context); + } + + if (needsScraping) { + const scrapingCheck = await isScrapingAvailable(siteUrl, context); + scrapingAvailable = scrapingCheck.available; + + // Send Slack notification with scraping statistics if available + if (scrapingCheck.stats && slackContext) { + const { completed, failed, total } = scrapingCheck.stats; + const statsMessage = `:mag: *Scraping Statistics for ${siteUrl}*\n` + + `✅ Completed: ${completed}\n` + + `❌ Failed: ${failed}\n` + + `📊 Total: ${total}`; + + if (failed > 0) { + await say( + env, + log, + slackContext, + `${statsMessage}\n:information_source: _${failed} failed URLs will be retried on re-onboarding._`, + ); + } else { + await say(env, log, slackContext, statsMessage); } } - } catch (error) { - log.warn(`Could not resolve canonical URL or parse siteUrl for data source checks: ${siteUrl}`, error); } } diff --git a/test/index.test.js b/test/index.test.js index 08f238f..7b44e00 100644 --- a/test/index.test.js +++ b/test/index.test.js @@ -203,5 +203,29 @@ describe('Index Tests', () => { expect(resp.status).to.equal(200); expect(directContext.log.info.calledWith(sinon.match(/Received message with type: dummy/))).to.be.true; }); + + it('detects wrapped SQS events with messageId in context.invocation.event.Records', async () => { + // Test case where event.Records is not present, but context.invocation.event.Records is + // This covers lines 111-112 in src/index.js + const wrappedSqsContext = { + ...context, + invocation: { + event: { + Records: [{ + messageId: 'test-message-id-123', + body: JSON.stringify({ + type: 'dummy', + siteId: 'wrapped-site', + }), + }], + }, + }, + }; + + // Pass an empty event object (no top-level Records) + const resp = await main({}, wrappedSqsContext); + expect(resp.status).to.equal(200); + expect(wrappedSqsContext.log.info.calledWith(sinon.match(/Received message with type: dummy/))).to.be.true; + }); }); }); diff --git a/test/tasks/opportunity-status-processor/opportunity-status-processor.test.js b/test/tasks/opportunity-status-processor/opportunity-status-processor.test.js index 20354b5..706faa8 100644 --- a/test/tasks/opportunity-status-processor/opportunity-status-processor.test.js +++ b/test/tasks/opportunity-status-processor/opportunity-status-processor.test.js @@ -44,11 +44,12 @@ describe('Opportunity Status Processor', () => { // Mock fetch for robots.txt and HEAD requests global.fetch = sandbox.stub(); - global.fetch.resolves({ + global.fetch.callsFake((url) => Promise.resolve({ ok: true, status: 200, + url, // Include URL for resolveCanonicalUrl text: sandbox.stub().resolves('User-agent: *\nAllow: /'), - }); + })); // Mock context context = new MockContextBuilder() @@ -268,9 +269,9 @@ describe('Opportunity Status Processor', () => { mockSite.getOpportunities.resolves(mockOpportunities); // For this test, we'll just verify that the error is handled gracefully - // The actual resolveCanonicalUrl function will throw an error for invalid URLs + // resolveCanonicalUrl returns null for invalid URLs, triggering log.warn await runOpportunityStatusProcessor(message, context); - expect(context.log.warn.calledWith('Could not resolve canonical URL or parse siteUrl for data source checks: invalid-url', sinon.match.any)).to.be.true; + expect(context.log.warn.calledWith(sinon.match(/Could not resolve canonical URL for: invalid-url/))).to.be.true; expect(mockSite.getOpportunities.called).to.be.true; }); @@ -348,6 +349,7 @@ describe('Opportunity Status Processor', () => { info: sinon.stub(), error: sinon.stub(), warn: sinon.stub(), + debug: sinon.stub(), }, env: { RUM_ADMIN_KEY: 'test-admin-key', @@ -399,13 +401,13 @@ describe('Opportunity Status Processor', () => { await runOpportunityStatusProcessor(testMessage, testContext); - // Verify error handling for localhost URLs - expect(testContext.log.warn.calledWith(`Could not resolve canonical URL or parse siteUrl for data source checks: ${testCase.url}`, sinon.match.any)).to.be.true; + // Verify error handling for localhost URLs (now uses log.warn for null resolvedUrl) + expect(testContext.log.warn.calledWith(sinon.match(/Could not resolve canonical URL for.*Site may be unreachable/))).to.be.true; })); }); it('should handle RUM success scenarios', async () => { - // Test RUM available (success case) - use a simple URL that should resolve quickly + // Test RUM available (success case) - tries original domain first mockRUMClient.retrieveDomainkey.resolves('test-domain-key'); const RUMAPIClient = await import('@adobe/spacecat-shared-rum-api-client'); const createFromStub = sinon.stub(RUMAPIClient.default, 'createFrom').returns(mockRUMClient); @@ -420,30 +422,138 @@ describe('Opportunity Status Processor', () => { }, }; + const testMockSite = { + getOpportunities: sinon.stub().resolves([]), + }; + const testContext = { ...mockContext, dataAccess: { Site: { - findById: sinon.stub().resolves({ - getOpportunities: sinon.stub().resolves([]), - }), + findById: sinon.stub().resolves(testMockSite), }, SiteTopPage: { allBySiteIdAndSourceAndGeo: sinon.stub().resolves([]), }, + Configuration: { + findLatest: sinon.stub().resolves({}), + }, }, }; await runOpportunityStatusProcessor(testMessage, testContext); - // Verify RUM was checked successfully - this should cover lines 26-37 + // Verify RUM was checked successfully - now tries original domain first expect(createFromStub.calledWith(testContext)).to.be.true; - expect(mockRUMClient.retrieveDomainkey.calledWith('example.com')).to.be.true; + expect(mockRUMClient.retrieveDomainkey.callCount).to.be.at.least(1); + expect(mockRUMClient.retrieveDomainkey.getCall(0).args[0]).to.equal('example.com'); expect(testContext.log.info.calledWith('RUM is available for domain: example.com')).to.be.true; createFromStub.restore(); }); + it('should try toggled domain when original domain fails', async () => { + // Test fallback to toggled domain when original fails + const mockRUMClientWithFallback = { + retrieveDomainkey: sinon.stub(), + }; + // First call (example.com) fails, second call (www.example.com) succeeds + mockRUMClientWithFallback.retrieveDomainkey + .onFirstCall().rejects(new Error('404 Not Found')) + .onSecondCall().resolves('test-domain-key'); + + const RUMAPIClient = await import('@adobe/spacecat-shared-rum-api-client'); + const createFromStub = sinon.stub(RUMAPIClient.default, 'createFrom').returns(mockRUMClientWithFallback); + + const testMessage = { + siteId: 'test-site-id', + siteUrl: 'https://example.com', + organizationId: 'test-org-id', + taskContext: { + auditTypes: ['cwv'], + slackContext: null, + }, + }; + + const testMockSite = { + getOpportunities: sinon.stub().resolves([]), + }; + + const testContext = { + ...mockContext, + dataAccess: { + Site: { + findById: sinon.stub().resolves(testMockSite), + }, + SiteTopPage: { + allBySiteIdAndSourceAndGeo: sinon.stub().resolves([]), + }, + Configuration: { + findLatest: sinon.stub().resolves({}), + }, + }, + }; + + await runOpportunityStatusProcessor(testMessage, testContext); + + // Verify both attempts were made - original first, then toggled + expect(mockRUMClientWithFallback.retrieveDomainkey.callCount).to.equal(2); + expect(mockRUMClientWithFallback.retrieveDomainkey.getCall(0).args[0]).to.equal('example.com'); + expect(mockRUMClientWithFallback.retrieveDomainkey.getCall(1).args[0]).to.equal('www.example.com'); + expect(testContext.log.info.calledWith('RUM is available for domain: www.example.com')).to.be.true; + + createFromStub.restore(); + }); + + it('should handle both domain variations failing', async () => { + // Test when both original and toggled domain fail + const mockRUMClientFailBoth = { + retrieveDomainkey: sinon.stub().rejects(new Error('404 Not Found')), + }; + + const RUMAPIClient = await import('@adobe/spacecat-shared-rum-api-client'); + const createFromStub = sinon.stub(RUMAPIClient.default, 'createFrom').returns(mockRUMClientFailBoth); + + const testMessage = { + siteId: 'test-site-id', + siteUrl: 'https://example.com', + organizationId: 'test-org-id', + taskContext: { + auditTypes: ['cwv'], + slackContext: null, + }, + }; + + const testMockSite = { + getOpportunities: sinon.stub().resolves([]), + }; + + const testContext = { + ...mockContext, + dataAccess: { + Site: { + findById: sinon.stub().resolves(testMockSite), + }, + SiteTopPage: { + allBySiteIdAndSourceAndGeo: sinon.stub().resolves([]), + }, + Configuration: { + findLatest: sinon.stub().resolves({}), + }, + }, + }; + + await runOpportunityStatusProcessor(testMessage, testContext); + + // Verify both attempts were made - original first, then toggled + expect(mockRUMClientFailBoth.retrieveDomainkey.callCount).to.equal(2); + expect(mockRUMClientFailBoth.retrieveDomainkey.getCall(0).args[0]).to.equal('example.com'); + expect(mockRUMClientFailBoth.retrieveDomainkey.getCall(1).args[0]).to.equal('www.example.com'); + expect(testContext.log.warn.calledWith(sinon.match(/RUM not available for www.example.com/))).to.be.true; + + createFromStub.restore(); + }); + it('should handle opportunities with different types and localhost URLs', async () => { // Test opportunities with different types when using localhost URLs const testCases = [ @@ -1028,7 +1138,7 @@ describe('Opportunity Status Processor', () => { await runOpportunityStatusProcessor(message, context); - expect(context.log.warn.calledWithMatch('Could not resolve canonical URL')).to.be.true; + expect(context.log.warn.calledWithMatch('Could not resolve canonical URL for: not-a-valid-url')).to.be.true; }); it('should handle opportunities with missing getData method', async () => {