diff --git a/.changeset/par-expiry-bridge-and-callback-leak.md b/.changeset/par-expiry-bridge-and-callback-leak.md new file mode 100644 index 00000000..1686f6e8 --- /dev/null +++ b/.changeset/par-expiry-bridge-and-callback-leak.md @@ -0,0 +1,9 @@ +--- +'ePDS': patch +--- + +Sign-in no longer fails with a raw JSON error page when a user takes too long on the OTP step. + +**Affects:** End users + +**End users:** Previously, if you took more than five minutes between requesting your one-time code and submitting it (a slow inbox, switching tabs, fishing the code out of spam, multiple Resend cycles), sign-in could fail with a blank page showing only `{"error": "Authentication failed"}` on the PDS host — even though your OTP code itself was still valid. You now either land back inside the app you were signing into (which can offer a one-click retry), or see a styled error page on the PDS host explaining that sign-in timed out — depending on how far through the flow the timeout is detected. Either way, no more raw JSON. diff --git a/e2e/cucumber.mjs b/e2e/cucumber.mjs index 5f0ff0d6..9498d701 100644 --- a/e2e/cucumber.mjs +++ b/e2e/cucumber.mjs @@ -23,11 +23,16 @@ // environments that provide a second demo — reflect that in the tag // expression by excluding the tag when E2E_DEMO_UNTRUSTED_URL is unset. // -// Scenarios tagged @otp-expiry call the auth-service /_internal/test/* -// hooks (which require EPDS_TEST_HOOKS=1 on the server side and the -// matching EPDS_INTERNAL_SECRET on the client side). Exclude them when -// E2E_INTERNAL_SECRET is unset so they don't fail at run time on -// environments that haven't enabled the hook. +// Scenarios tagged @otp-expiry or @par-callback-error call +// /_internal/test/* hooks (auth-service for @otp-expiry, pds-core for +// @par-callback-error) which require EPDS_TEST_HOOKS=1 on the server +// side and the matching EPDS_INTERNAL_SECRET on the client side. +// Exclude both when E2E_INTERNAL_SECRET is unset so they don't fail +// at run time on environments that haven't enabled the hooks. +const hookTagExclusions = process.env.E2E_INTERNAL_SECRET + ? [] + : ['not @otp-expiry', 'not @par-callback-error'] + const defaultTagExclusions = [ 'not @manual', 'not @docker-only', @@ -35,7 +40,7 @@ const defaultTagExclusions = [ 'not @risk-of-disruption', 'not @session-reuse', ...(process.env.E2E_DEMO_UNTRUSTED_URL ? [] : ['not @untrusted-client']), - ...(process.env.E2E_INTERNAL_SECRET ? [] : ['not @otp-expiry']), + ...hookTagExclusions, ] const shared = { diff --git a/e2e/step-definitions/auth.steps.ts b/e2e/step-definitions/auth.steps.ts index 856d7c88..2d0a3bb4 100644 --- a/e2e/step-definitions/auth.steps.ts +++ b/e2e/step-definitions/auth.steps.ts @@ -517,3 +517,122 @@ Then('the login page renders normally', async function (this: EpdsWorld) { Then('the OTP flow still works to completion', function (this: EpdsWorld) { return this.skipIfNoMailpit() }) + +// --------------------------------------------------------------------------- +// PAR (request_uri) expiry scenario +// --------------------------------------------------------------------------- +// +// The PAR record lives in pds-core's @atproto/oauth-provider store +// (authorization_request table) and is independent of the auth-service +// auth_flow row. Upstream hardcodes PAR_EXPIRES_IN = 5 min, so a user +// who takes >5 min between requesting and submitting the OTP (slow +// inbox, switching tabs, multiple Resend cycles) trips +// "AccessDeniedError: This request has expired" inside +// /oauth/epds-callback even though all auth-service-side state is +// healthy. PAR expiry is genuine: once expired, the row cannot be +// revived — RequestManager.get() throws AND deletes the row in the +// same call, so any "ping" mechanism is too late. +// +// The fix is to honour RFC 6749 §4.1.2.1 and surface the failure as +// a redirect back to the client's redirect_uri with error, +// error_description, iss, and state query params. To reproduce +// without a 5-minute wall-clock wait, a pds-core test-only hook +// (mounted iff EPDS_TEST_HOOKS=1, double-gated by +// EPDS_INTERNAL_SECRET and a NODE_ENV=production refusal) deletes +// the PAR row directly: +// +// POST /_internal/test/delete-par { request_uri } + +async function callPdsExpiryHook( + hookPath: string, + requestUri: string, +): Promise { + const url = `${testEnv.pdsUrl}${hookPath}` + const res = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-internal-secret': testEnv.internalSecret, + }, + body: JSON.stringify({ request_uri: requestUri }), + signal: AbortSignal.timeout(10_000), + }) + if (!res.ok) { + const errBody = await res.text().catch(() => '') + throw new Error( + `${hookPath} hook failed: ${res.status} ${res.statusText}: ${errBody}`, + ) + } +} + +/** + * Read the PAR request_uri from the current auth-service login page + * URL and stash it on the world for subsequent expiry hooks. The URL + * is `https:///oauth/authorize?request_uri=urn:...&...` while the + * user is on the login/OTP form. Throws if the URL doesn't carry it, + * which means the surrounding scenario is mis-ordered (the page must + * be on the auth-service side before this step runs). + */ +function captureRequestUriFromPage(world: EpdsWorld): string { + const page = getPage(world) + const requestUri = new URL(page.url()).searchParams.get('request_uri') + if (!requestUri) { + throw new Error( + `Expected request_uri in page URL but found none: ${page.url()}`, + ) + } + world.lastRequestUri = requestUri + return requestUri +} + +When( + 'the PAR request_uri has expired before the bridge fires', + async function (this: EpdsWorld) { + if (!testEnv.mailpitPass) return 'pending' + if (!testEnv.internalSecret) return 'pending' + const requestUri = captureRequestUriFromPage(this) + await callPdsExpiryHook('/_internal/test/delete-par', requestUri) + }, +) + +Then('the response body is not raw JSON', async function (this: EpdsWorld) { + const page = getPage(this) + // The OTP form's submit is JS-driven and async, and Playwright's + // fill() returns before the bridge redirects. Wait for the + // browser to actually leave the auth-service host and arrive at + // pds-core's epds-callback (where, in this scenario, the + // catch-block renders the error page). Without this wait we'd + // read the still-rendering OTP form's body. + const pdsHost = new URL(testEnv.pdsUrl).host + await page.waitForURL( + (url) => url.host === pdsHost && url.pathname.includes('epds-callback'), + { timeout: 30_000 }, + ) + const body = await page.locator('body').innerText() + // The pre-fix behaviour returned a body that started with + // {"error": "Authentication failed"}. A graceful HTML page won't — + // its

/

text isn't valid JSON. The regex catches any + // {"error": ...} shape so a future leak of a different JSON + // payload is still caught. + if (/^\s*\{\s*"error"/.test(body)) { + throw new Error( + `pds-core leaked raw JSON to the browser: ${body.slice(0, 200)}`, + ) + } +}) + +Then( + 'the response body explains that sign-in timed out', + async function (this: EpdsWorld) { + const page = getPage(this) + const body = await page.locator('body').innerText() + // Don't pin exact wording — just require something that mentions + // the timeout / expiry so a human reading it understands why + // their sign-in failed. + if (!/expir|timed? ?out|too long/i.test(body)) { + throw new Error( + `Error page should mention the timeout but said: "${body.slice(0, 500)}"`, + ) + } + }, +) diff --git a/features/passwordless-authentication.feature b/features/passwordless-authentication.feature index 995f74e6..e5a9a589 100644 --- a/features/passwordless-authentication.feature +++ b/features/passwordless-authentication.feature @@ -272,6 +272,72 @@ Feature: Passwordless authentication via email OTP Then the browser is redirected back to the demo client And the demo client has a valid OAuth access token + # --- PAR (request_uri) expiry --- + # + # The PAR ("Pushed Authorization Request") record lives in pds-core's + # @atproto/oauth-provider store. Upstream hardcodes: + # * PAR_EXPIRES_IN = 5 minutes (initial TTL on PAR creation), and + # * AUTHORIZATION_INACTIVITY_TIMEOUT = 5 minutes (sliding window + # reset on every requestManager.get()). + # If the user takes >5 minutes between requesting an OTP and + # submitting it (slow inbox, switching tabs, fishing the code out + # of spam, multiple Resend cycles), the PAR row is gone by the time + # auth-service's /auth/complete redirects to /oauth/epds-callback. + # PAR expiry is genuine: once expired, the row cannot be revived — + # @atproto/oauth-provider's RequestManager.get() throws + # AccessDeniedError AND deletes the row in the same call. + # + # The bug a real user hit: the resulting AccessDeniedError was + # caught inside /oauth/epds-callback and surfaced as a raw JSON + # response body of {"error": "Authentication failed"} on the PDS + # host, with no client redirect and no styled page. The fix is to + # honour RFC 6749 §4.1.2.1: redirect the user back to the OAuth + # client's redirect_uri with error=access_denied, + # error_description, iss, and state query parameters, so the + # client can offer "Sign-in expired, try again". When no + # redirect_uri is recoverable from the dead PAR row, fall back to + # a styled HTML page on the PDS host instead of raw JSON. + # + # access_denied is the same error code @atproto/oauth-provider uses + # for PAR expiry on its own paths (e.g. when a user is bounced + # through /oauth/authorize with an expired request_uri). We follow + # that precedent for consistency, even though "denied" is mildly + # misleading — the error_description carries the real user-friendly + # explanation. + # + # Wall-clock waiting 5 minutes is unacceptable for an e2e scenario, + # so a test-only pds-core hook (POST /_internal/test/delete-par, + # gated by EPDS_TEST_HOOKS=1 + EPDS_INTERNAL_SECRET, and refused + # under NODE_ENV=production) deletes the PAR row directly. The + # auth_flow row, OTP, and cookie are all left alive so the failure + # isolates to the PAR layer — exactly what a slow user trips in + # production. + + # The test deletes the PAR row before the callback fires, mirroring + # the production failure where /oauth/epds-callback's first + # requestManager.get() (Step 2 in the handler) throws + # InvalidRequestError("Unknown request_uri"). With no live PAR row + # there is no redirect_uri to recover, so the user must land on a + # styled HTML page on the PDS host — not the previous raw JSON + # body — explaining that the sign-in timed out. + # + # The redirect-back-to-client path (driven by the redirect_uri / + # state captured during a successful Step 2) is exercised by the + # other paths inside the same handler that throw later in the + # flow; we don't have an easy e2e harness to inject a mid-flow + # failure today. + @email @par-callback-error + Scenario: Expired PAR shows a styled HTML page instead of leaking JSON + Given a returning user has a PDS account + When the demo client initiates an OAuth login + And the user enters the test email on the login page + Then an OTP email arrives in the mail trap + And the login page shows an OTP verification form + When the PAR request_uri has expired before the bridge fires + And the user enters the OTP code + Then the response body is not raw JSON + And the response body explains that sign-in timed out + # --- Brute force protection --- Scenario: OTP verification rejects wrong code diff --git a/packages/auth-service/src/__tests__/test-hooks.test.ts b/packages/auth-service/src/__tests__/test-hooks.test.ts index 2155e7e6..42a437a4 100644 --- a/packages/auth-service/src/__tests__/test-hooks.test.ts +++ b/packages/auth-service/src/__tests__/test-hooks.test.ts @@ -5,6 +5,7 @@ import * as os from 'node:os' import * as path from 'node:path' import { randomUUID } from 'node:crypto' import Database from 'better-sqlite3' +import { postHook } from '@certified-app/shared' import { createTestHooksRouter } from '../routes/test-hooks.js' function readExpiresAt(dbPath: string, identifier: string): string | undefined { @@ -44,42 +45,6 @@ function seedVerification( } } -async function postHook( - app: express.Express, - hookPath: string, - body: Record, - headers: Record = {}, -): Promise<{ status: number; json: Record }> { - const server = app.listen(0) - const port = await new Promise((resolve, reject) => { - server.once('error', reject) - server.once('listening', () => { - const addr = server.address() - if (typeof addr === 'object' && addr) { - resolve(addr.port) - } else { - reject(new Error('Failed to resolve ephemeral port')) - } - }) - }) - server.unref() - try { - const res = await fetch(`http://127.0.0.1:${port}${hookPath}`, { - method: 'POST', - headers: { 'Content-Type': 'application/json', ...headers }, - body: JSON.stringify(body), - }) - const json = (await res.json().catch(() => ({}))) as Record - return { status: res.status, json } - } finally { - await new Promise((resolve) => { - server.close(() => { - resolve() - }) - }) - } -} - async function postExpire( app: express.Express, body: Record, diff --git a/packages/auth-service/src/lib/render-error.ts b/packages/auth-service/src/lib/render-error.ts index bc454deb..8196d8b6 100644 --- a/packages/auth-service/src/lib/render-error.ts +++ b/packages/auth-service/src/lib/render-error.ts @@ -1,35 +1,17 @@ -import { escapeHtml } from '@certified-app/shared' +import { renderError as renderSharedError } from '@certified-app/shared' import { POWERED_BY_CSS, POWERED_BY_HTML } from './page-helpers.js' -const ERROR_CSS = ` - * { box-sizing: border-box; margin: 0; padding: 0; } - body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #f5f5f5; min-height: 100vh; display: flex; align-items: center; justify-content: center; padding: 20px; } - .page-wrap { display: flex; flex-direction: column; align-items: stretch; max-width: 420px; width: 100%; } - ${POWERED_BY_CSS} - .container { background: white; border-radius: 12px; padding: 40px; width: 100%; box-shadow: 0 2px 8px rgba(0,0,0,0.08); text-align: center; } - h1 { font-size: 24px; margin-bottom: 16px; color: #111; } - .error { color: #dc3545; background: #fdf0f0; padding: 12px; border-radius: 8px; font-size: 15px; line-height: 1.5; } -` - +/** + * Auth-service's error page reuses `@certified-app/shared`'s styled + * `renderError` and adds the auth-service-specific "Powered by + * Certified" footer below the error card. Pds-core uses the shared + * version directly (no footer) — the brand promo only belongs on + * sign-in-flow surfaces. + */ export function renderError(message: string, title = 'Error'): string { - return ` - - - - - - - ${escapeHtml(title)} - - - -

-
-

${escapeHtml(title)}

-

${escapeHtml(message)}

-
- ${POWERED_BY_HTML} -
- -` + return renderSharedError(message, { + title, + extraCss: POWERED_BY_CSS, + bodyExtra: POWERED_BY_HTML, + }) } diff --git a/packages/pds-core/src/__tests__/epds-callback-error.test.ts b/packages/pds-core/src/__tests__/epds-callback-error.test.ts new file mode 100644 index 00000000..e8c4fa07 --- /dev/null +++ b/packages/pds-core/src/__tests__/epds-callback-error.test.ts @@ -0,0 +1,338 @@ +import { describe, expect, it, vi } from 'vitest' +import type { Response } from 'express' +import { + EXPIRED_PAR_MESSAGE_PATTERN, + classifyCallbackError, + handleCallbackError, +} from '../lib/epds-callback-error.js' + +const PDS_URL = 'https://pds.example' +const REDIRECT_URI = 'https://demo.example/api/oauth/callback' +const STATE = 'XNMi-ebr4JAUAEWa-52HEA' + +const TIMEOUT_DESCRIPTION = + 'Your sign-in took too long to complete and timed out. Please start sign-in again.' +const SERVER_DESCRIPTION = 'Authentication failed.' + +/** Build a minimal Response double that records the calls + * handleCallbackError makes on it. Untouched by middleware, so reset + * per test. */ +function makeResStub() { + let headersSent = false + const headers: Record = {} + let statusCode: number | undefined + let contentType: string | undefined + let body: string | undefined + let redirectStatus: number | undefined + let redirectLocation: string | undefined + + const res = { + get headersSent() { + return headersSent + }, + setHeader(name: string, value: string) { + headers[name.toLowerCase()] = value + return res + }, + status(code: number) { + statusCode = code + return res + }, + type(t: string) { + contentType = t + return res + }, + send(s: string) { + body = s + headersSent = true + return res + }, + redirect(status: number, location: string) { + redirectStatus = status + redirectLocation = location + headersSent = true + }, + /** Pretend an earlier middleware already sent something — drives + * the no-op branch. */ + forceHeadersSent() { + headersSent = true + }, + } as unknown as Response & { forceHeadersSent: () => void } + + return { + res, + inspect: () => ({ + headers, + statusCode, + contentType, + body, + redirectStatus, + redirectLocation, + }), + } +} + +describe('EXPIRED_PAR_MESSAGE_PATTERN', () => { + it.each([ + 'This request has expired', + 'Unknown request_uri', + 'invalid_grant', + 'something has expired', + // Case-insensitive + 'INVALID_GRANT', + ])('matches dead-PAR message: %s', (msg) => { + expect(EXPIRED_PAR_MESSAGE_PATTERN.test(msg)).toBe(true) + }) + + it.each([ + 'Database connection refused', + 'Account not found', + 'Internal server error', + '', + ])('does not match unrelated message: %s', (msg) => { + expect(EXPIRED_PAR_MESSAGE_PATTERN.test(msg)).toBe(false) + }) +}) + +describe('classifyCallbackError', () => { + it.each([ + { + label: 'AccessDeniedError("This request has expired")', + message: 'This request has expired', + }, + { + label: 'InvalidRequestError("Unknown request_uri")', + message: 'Unknown request_uri', + }, + { label: 'invalid_grant', message: 'invalid_grant' }, + ])('classifies $label as expired', ({ message }) => { + expect(classifyCallbackError(new Error(message))).toEqual({ + code: 'access_denied', + description: TIMEOUT_DESCRIPTION, + isExpired: true, + }) + }) + + it('classifies a generic failure as server_error', () => { + const err = new Error('Database connection refused') + expect(classifyCallbackError(err)).toEqual({ + code: 'server_error', + description: SERVER_DESCRIPTION, + isExpired: false, + }) + }) + + it('handles non-Error thrown values by stringifying them', () => { + expect(classifyCallbackError('something has expired')).toEqual({ + code: 'access_denied', + description: TIMEOUT_DESCRIPTION, + isExpired: true, + }) + expect(classifyCallbackError({ code: 'oops' })).toEqual({ + code: 'server_error', + description: SERVER_DESCRIPTION, + isExpired: false, + }) + }) +}) + +/** Shared driver for handleCallbackError tests. Lifts the response + * stub + spies + the seven-field options object into a single call so + * individual tests only have to spell out what's varying (the error, + * the captured redirect_uri/state, optionally a forceHeadersSent + * override). Returns the inspect snapshot plus the spies, so tests + * can assert on response state, the renderError contract, and the + * logger contract from one call site. */ +function invoke(opts: { + err: unknown + capturedRedirectUri?: string + capturedState?: string + forceHeadersSent?: boolean +}) { + const { res, inspect } = makeResStub() + if (opts.forceHeadersSent) { + ;(res as Response & { forceHeadersSent: () => void }).forceHeadersSent() + } + const renderError = vi.fn((m: string) => `${m}`) + const logger = { error: vi.fn(), warn: vi.fn() } + handleCallbackError({ + res, + err: opts.err, + capturedRedirectUri: opts.capturedRedirectUri, + capturedState: opts.capturedState, + pdsUrl: PDS_URL, + logger, + renderError, + }) + return { ...inspect(), renderError, logger } +} + +describe('handleCallbackError — redirect path', () => { + it('redirects with error=access_denied + timeout description on expired PAR', () => { + const got = invoke({ + err: new Error('This request has expired'), + capturedRedirectUri: REDIRECT_URI, + capturedState: STATE, + }) + expect(got.renderError).not.toHaveBeenCalled() + const url = new URL(got.redirectLocation!) + expect(url.origin + url.pathname).toBe(REDIRECT_URI) + expect(url.searchParams.get('error')).toBe('access_denied') + expect(url.searchParams.get('error_description')).toBe(TIMEOUT_DESCRIPTION) + expect(url.searchParams.get('iss')).toBe(PDS_URL) + expect(url.searchParams.get('state')).toBe(STATE) + }) + + it('redirects with error=server_error on unrelated failures', () => { + const got = invoke({ + err: new Error('Database connection refused'), + capturedRedirectUri: REDIRECT_URI, + capturedState: STATE, + }) + const url = new URL(got.redirectLocation!) + expect(url.searchParams.get('error')).toBe('server_error') + expect(url.searchParams.get('error_description')).toBe(SERVER_DESCRIPTION) + expect(url.searchParams.get('iss')).toBe(PDS_URL) + expect(url.searchParams.get('state')).toBe(STATE) + }) + + it('omits state when none was captured', () => { + const got = invoke({ + err: new Error('This request has expired'), + capturedRedirectUri: REDIRECT_URI, + }) + const url = new URL(got.redirectLocation!) + expect(url.searchParams.has('state')).toBe(false) + }) + + it('issues a 303 See Other so OAuth clients re-fetch with GET', () => { + const got = invoke({ + err: new Error('This request has expired'), + capturedRedirectUri: REDIRECT_URI, + capturedState: STATE, + }) + expect(got.redirectStatus).toBe(303) + }) + + it('marks the redirect non-cacheable so per-request state cannot be replayed', () => { + const got = invoke({ + err: new Error('This request has expired'), + capturedRedirectUri: REDIRECT_URI, + capturedState: STATE, + }) + expect(got.headers['cache-control']).toBe('no-store') + }) +}) + +describe('handleCallbackError — malformed captured redirect_uri', () => { + // The redirect_uri is captured from an upstream-validated PAR row, + // so this branch is defensive — but the catch block exists to spare + // the user a 500, and a `new URL()` throw inside it would defeat + // that purpose. Verify the malformed-URL path falls through to the + // styled HTML page and logs the URL parse failure. + it.each([ + 'not a url at all', + '://broken', + '/relative/only', + 'https://[malformed-bracket', + ])('falls back to HTML when capturedRedirectUri is %s', (badUri) => { + const got = invoke({ + err: new Error('This request has expired'), + capturedRedirectUri: badUri, + capturedState: STATE, + }) + // No redirect emitted. + expect(got.redirectLocation).toBeUndefined() + // HTML page served instead. + expect(got.statusCode).toBe(400) + expect(got.contentType).toBe('html') + expect(got.renderError).toHaveBeenCalledWith(TIMEOUT_DESCRIPTION) + // The URL parse failure must be visible in operational logs. + expect(got.logger.error).toHaveBeenCalledWith( + expect.objectContaining({ capturedRedirectUri: badUri }), + expect.stringContaining('not a valid URL'), + ) + }) +}) + +describe('handleCallbackError — HTML fallback path', () => { + it('renders a styled HTML page when no redirect_uri was captured (expired)', () => { + const got = invoke({ err: new Error('This request has expired') }) + expect(got.statusCode).toBe(400) + expect(got.contentType).toBe('html') + expect(got.renderError).toHaveBeenCalledWith(TIMEOUT_DESCRIPTION) + expect(got.body).toContain(TIMEOUT_DESCRIPTION) + expect(got.redirectLocation).toBeUndefined() + }) + + it('marks the HTML response non-cacheable', () => { + const got = invoke({ err: new Error('This request has expired') }) + expect(got.headers['cache-control']).toBe('no-store') + }) + + it('renders a 500 HTML page on generic server failure with no redirect_uri', () => { + const got = invoke({ err: new Error('Database connection refused') }) + expect(got.statusCode).toBe(500) + expect(got.renderError).toHaveBeenCalledWith(SERVER_DESCRIPTION) + }) + + it('does NOT leak raw JSON {"error":"Authentication failed"}', () => { + // Regression guard against the pre-fix behaviour. The body must + // not parse as the legacy JSON error shape on either status path. + for (const err of [ + new Error('This request has expired'), + new Error('Database connection refused'), + ]) { + const got = invoke({ err }) + const body = got.body ?? '' + expect(body.startsWith('{')).toBe(false) + expect(body).not.toMatch(/^\s*\{\s*"error"/) + } + }) +}) + +describe('handleCallbackError — already-responded short-circuit', () => { + it('does nothing when headers were already sent', () => { + const got = invoke({ + err: new Error('This request has expired'), + capturedRedirectUri: REDIRECT_URI, + capturedState: STATE, + forceHeadersSent: true, + }) + expect(got.redirectLocation).toBeUndefined() + expect(got.body).toBeUndefined() + expect(got.renderError).not.toHaveBeenCalled() + }) +}) + +describe('handleCallbackError — log levels', () => { + // Expired PARs are an expected user-paced timeout, not a server + // fault. They should land at warn so they stay in operational logs + // but don't trigger error-level alerting once expiry becomes + // routine in production. + it('logs an expired-PAR failure at warn (not error)', () => { + const got = invoke({ + err: new Error('This request has expired'), + capturedRedirectUri: REDIRECT_URI, + capturedState: STATE, + }) + expect(got.logger.warn).toHaveBeenCalledWith( + expect.objectContaining({ err: expect.any(Error) }), + expect.stringContaining('timed out'), + ) + expect(got.logger.error).not.toHaveBeenCalled() + }) + + it('logs a generic server failure at error (not warn)', () => { + const got = invoke({ + err: new Error('Database connection refused'), + capturedRedirectUri: REDIRECT_URI, + capturedState: STATE, + }) + expect(got.logger.error).toHaveBeenCalledWith( + expect.objectContaining({ err: expect.any(Error) }), + 'ePDS callback error', + ) + expect(got.logger.warn).not.toHaveBeenCalled() + }) +}) diff --git a/packages/pds-core/src/__tests__/test-hooks.test.ts b/packages/pds-core/src/__tests__/test-hooks.test.ts index 09ac7902..9e5abeea 100644 --- a/packages/pds-core/src/__tests__/test-hooks.test.ts +++ b/packages/pds-core/src/__tests__/test-hooks.test.ts @@ -1,5 +1,6 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import express from 'express' +import { postHook } from '@certified-app/shared' import { installTestHooks } from '../lib/test-hooks.js' // Minimal Kysely-like update query mock: chains `.set().where()*.executeTakeFirst()` @@ -33,9 +34,36 @@ function makeFakeUpdate() { } } +// Sibling shape for the delete-par chain: `.where().executeTakeFirst()`, +// returning {numDeletedRows}. Same recording style. +function makeFakeDelete() { + const wheres: Array<[string, string, unknown]> = [] + let result: { numDeletedRows: number | bigint } = { numDeletedRows: 0 } + const chain = { + where(col: string, op: string, val: unknown) { + wheres.push([col, op, val]) + return chain + }, + executeTakeFirst() { + return Promise.resolve(result) + }, + } + return { + chain, + setDeletedRows(n: number | bigint) { + result = { numDeletedRows: n } + }, + inspect() { + return { wheres } + }, + } +} + function makeFakePds(opts: { - fakeUpdate: ReturnType + fakeUpdate?: ReturnType + fakeDelete?: ReturnType failOnExecute?: boolean + failOnDelete?: boolean }) { return { ctx: { @@ -54,7 +82,19 @@ function makeFakePds(opts: { }), } } - return opts.fakeUpdate.chain + return opts.fakeUpdate?.chain + }, + deleteFrom: (table: string) => { + expect(table).toBe('authorization_request') + if (opts.failOnDelete) { + return { + where: () => ({ + executeTakeFirst: () => + Promise.reject(new Error('db down')), + }), + } + } + return opts.fakeDelete?.chain }, }, }, @@ -64,72 +104,52 @@ function makeFakePds(opts: { } as any } -async function postHook( - app: express.Express, - body: Record, - headers: Record = {}, -): Promise<{ status: number; json: Record }> { - const server = app.listen(0) - const port = await new Promise((resolve, reject) => { - server.once('error', reject) - server.once('listening', () => { - const addr = server.address() - if (typeof addr === 'object' && addr) resolve(addr.port) - else reject(new Error('Failed to resolve ephemeral port')) - }) - }) - server.unref() - try { - const res = await fetch( - `http://127.0.0.1:${port}/_internal/test/expire-device-account`, - { - method: 'POST', - headers: { 'Content-Type': 'application/json', ...headers }, - body: JSON.stringify(body), - }, - ) - const json = (await res.json().catch(() => ({}))) as Record - return { status: res.status, json } - } finally { - await new Promise((resolve) => { - server.close(() => { - resolve() - }) - }) - } -} - /** Build an express app with installTestHooks applied to a default - * fake-pds + fake-update fixture. Centralises the boilerplate every test - * used to repeat. Tests that need to inspect the underlying update or - * override the failure mode pass extra options here. */ + * fake-pds + fake-update + fake-delete fixture. Tests that need to + * inspect the underlying update/delete or override the failure mode + * pass extra options here. */ function setupApp(opts?: { failOnExecute?: boolean updatedRows?: number | bigint + failOnDelete?: boolean + deletedRows?: number | bigint }): { app: express.Express fakeUpdate: ReturnType + fakeDelete: ReturnType logger: { warn: ReturnType; error: ReturnType } } { const fakeUpdate = makeFakeUpdate() + const fakeDelete = makeFakeDelete() if (opts?.updatedRows !== undefined) fakeUpdate.setUpdatedRows(opts.updatedRows) + if (opts?.deletedRows !== undefined) + fakeDelete.setDeletedRows(opts.deletedRows) const logger = { warn: vi.fn(), error: vi.fn() } const app = express() installTestHooks({ - pds: makeFakePds({ fakeUpdate, failOnExecute: opts?.failOnExecute }), + pds: makeFakePds({ + fakeUpdate, + fakeDelete, + failOnExecute: opts?.failOnExecute, + failOnDelete: opts?.failOnDelete, + }), app, logger, }) - return { app, fakeUpdate, logger } + return { app, fakeUpdate, fakeDelete, logger } } const SECRET = 'test-secret-1234' const AUTH_HEADER = { 'x-internal-secret': SECRET } -describe('installTestHooks — expire-device-account', () => { - let priorEnv: { hooks?: string; secret?: string; node?: string } = {} +const EXPIRE_PATH = '/_internal/test/expire-device-account' +const DELETE_PAR_PATH = '/_internal/test/delete-par' + +const VALID_REQUEST_URI = 'urn:ietf:params:oauth:request_uri:req-deadbeef' +function useEnvFixture(): void { + let priorEnv: { hooks?: string; secret?: string; node?: string } = {} beforeEach(() => { priorEnv = { hooks: process.env.EPDS_TEST_HOOKS, @@ -140,7 +160,6 @@ describe('installTestHooks — expire-device-account', () => { process.env.EPDS_INTERNAL_SECRET = SECRET process.env.EPDS_TEST_HOOKS = '1' }) - afterEach(() => { if (priorEnv.hooks === undefined) delete process.env.EPDS_TEST_HOOKS else process.env.EPDS_TEST_HOOKS = priorEnv.hooks @@ -149,12 +168,21 @@ describe('installTestHooks — expire-device-account', () => { if (priorEnv.node === undefined) delete process.env.NODE_ENV else process.env.NODE_ENV = priorEnv.node }) +} + +describe('installTestHooks — expire-device-account', () => { + useEnvFixture() it('does nothing when EPDS_TEST_HOOKS is unset', async () => { delete process.env.EPDS_TEST_HOOKS const { app } = setupApp() // Route was never mounted, so the request 404s before any auth check. - const res = await postHook(app, { did: 'did:plc:a' }, AUTH_HEADER) + const res = await postHook( + app, + EXPIRE_PATH, + { did: 'did:plc:a' }, + AUTH_HEADER, + ) expect(res.status).toBe(404) }) @@ -167,7 +195,7 @@ describe('installTestHooks — expire-device-account', () => { it('rejects requests without the internal secret', async () => { const { app } = setupApp() - const res = await postHook(app, { did: 'did:plc:a' }) + const res = await postHook(app, EXPIRE_PATH, { did: 'did:plc:a' }) expect(res.status).toBe(401) }) @@ -175,6 +203,7 @@ describe('installTestHooks — expire-device-account', () => { const { app } = setupApp() const res = await postHook( app, + EXPIRE_PATH, { did: 'did:plc:a' }, { 'x-internal-secret': 'wrong' }, ) @@ -183,14 +212,36 @@ describe('installTestHooks — expire-device-account', () => { it('rejects requests missing the did', async () => { const { app } = setupApp() - const res = await postHook(app, {}, AUTH_HEADER) + const res = await postHook(app, EXPIRE_PATH, {}, AUTH_HEADER) expect(res.status).toBe(400) expect(String(res.json.error)).toMatch(/did/i) }) + it.each([ + { label: 'number', value: 42 }, + { label: 'object', value: { sub: 'did:plc:a' } }, + { label: 'array', value: ['did:plc:a'] }, + { label: 'null', value: null }, + ])( + 'returns 400 (not 500) when did is a non-string $label', + async ({ value }) => { + const { app } = setupApp() + const res = await postHook(app, EXPIRE_PATH, { did: value }, AUTH_HEADER) + // Pre-fix this would crash inside .trim() and yield 500. The + // type-guard now treats non-string values as "missing did". + expect(res.status).toBe(400) + expect(String(res.json.error)).toMatch(/did/i) + }, + ) + it('backdates every device row for the did when no deviceId is given', async () => { const { app, fakeUpdate } = setupApp({ updatedRows: 2 }) - const res = await postHook(app, { did: 'did:plc:a' }, AUTH_HEADER) + const res = await postHook( + app, + EXPIRE_PATH, + { did: 'did:plc:a' }, + AUTH_HEADER, + ) expect(res.status).toBe(200) expect(res.json.updated).toBe(2) @@ -208,6 +259,7 @@ describe('installTestHooks — expire-device-account', () => { const { app, fakeUpdate } = setupApp({ updatedRows: 1 }) const res = await postHook( app, + EXPIRE_PATH, { did: 'did:plc:a', deviceId: 'dev-deadbeef' }, AUTH_HEADER, ) @@ -222,7 +274,12 @@ describe('installTestHooks — expire-device-account', () => { it('returns 500 and logs when the underlying update throws', async () => { const { app, logger } = setupApp({ failOnExecute: true }) - const res = await postHook(app, { did: 'did:plc:a' }, AUTH_HEADER) + const res = await postHook( + app, + EXPIRE_PATH, + { did: 'did:plc:a' }, + AUTH_HEADER, + ) expect(res.status).toBe(500) expect(logger.error).toHaveBeenCalledWith( @@ -234,10 +291,188 @@ describe('installTestHooks — expire-device-account', () => { it('coerces bigint numUpdatedRows to a regular Number', async () => { // Kysely's actual return type is `bigint` on better-sqlite3 driver. const { app } = setupApp({ updatedRows: BigInt(3) }) - const res = await postHook(app, { did: 'did:plc:a' }, AUTH_HEADER) + const res = await postHook( + app, + EXPIRE_PATH, + { did: 'did:plc:a' }, + AUTH_HEADER, + ) expect(res.status).toBe(200) expect(res.json.updated).toBe(3) expect(typeof res.json.updated).toBe('number') }) }) + +describe('installTestHooks — delete-par', () => { + useEnvFixture() + + it('does nothing when EPDS_TEST_HOOKS is unset', async () => { + delete process.env.EPDS_TEST_HOOKS + const { app } = setupApp() + const res = await postHook( + app, + DELETE_PAR_PATH, + { request_uri: VALID_REQUEST_URI }, + AUTH_HEADER, + ) + expect(res.status).toBe(404) + }) + + it('rejects requests without the internal secret', async () => { + const { app } = setupApp() + const res = await postHook(app, DELETE_PAR_PATH, { + request_uri: VALID_REQUEST_URI, + }) + expect(res.status).toBe(401) + }) + + it('rejects requests with the wrong secret', async () => { + const { app } = setupApp() + const res = await postHook( + app, + DELETE_PAR_PATH, + { request_uri: VALID_REQUEST_URI }, + { 'x-internal-secret': 'wrong' }, + ) + expect(res.status).toBe(401) + }) + + it('rejects requests missing the request_uri', async () => { + const { app } = setupApp() + const res = await postHook(app, DELETE_PAR_PATH, {}, AUTH_HEADER) + expect(res.status).toBe(400) + expect(String(res.json.error)).toMatch(/request_uri/i) + }) + + it('rejects malformed request_uri values', async () => { + const { app } = setupApp() + const res = await postHook( + app, + DELETE_PAR_PATH, + { request_uri: 'not-a-par-uri' }, + AUTH_HEADER, + ) + expect(res.status).toBe(400) + }) + + it.each([ + 'urn:ietf:params:oauth:request_uri:req-%', + 'urn:ietf:params:oauth:request_uri:req-%E0%A4%A', + 'urn:ietf:params:oauth:request_uri:req-foo%', + ])( + 'returns 400 (not 500) for request_uri with bad percent-encoding: %s', + async (badUri) => { + const { app } = setupApp() + const res = await postHook( + app, + DELETE_PAR_PATH, + { request_uri: badUri }, + AUTH_HEADER, + ) + // Pre-fix decodeURIComponent would throw URIError inside the + // try block and surface as 500. The decode-and-validate helper + // now catches URIError and surfaces malformed-encoding inputs + // as 400 the same way as a malformed prefix. + expect(res.status).toBe(400) + expect(String(res.json.error)).toMatch(/request_uri/i) + }, + ) + + it.each([ + { label: 'number', value: 42 }, + { label: 'object', value: { uri: VALID_REQUEST_URI } }, + { label: 'array', value: [VALID_REQUEST_URI] }, + { label: 'null', value: null }, + ])( + 'returns 400 (not 500) when request_uri is a non-string $label', + async ({ value }) => { + const { app } = setupApp() + const res = await postHook( + app, + DELETE_PAR_PATH, + { request_uri: value }, + AUTH_HEADER, + ) + // Pre-fix this would crash inside .trim() and yield 500. The + // type-guard now treats non-string values as malformed input. + expect(res.status).toBe(400) + expect(String(res.json.error)).toMatch(/request_uri/i) + }, + ) + + it('deletes the matching authorization_request row', async () => { + const { app, fakeDelete } = setupApp({ deletedRows: 1 }) + const res = await postHook( + app, + DELETE_PAR_PATH, + { request_uri: VALID_REQUEST_URI }, + AUTH_HEADER, + ) + + expect(res.status).toBe(200) + expect(res.json.deleted).toBe(1) + // The hook keys exclusively on id (the request_uri tail, decoded). + expect(fakeDelete.inspect().wheres).toEqual([['id', '=', 'req-deadbeef']]) + }) + + it('returns deleted=0 when no row matches', async () => { + const { app } = setupApp({ deletedRows: 0 }) + const res = await postHook( + app, + DELETE_PAR_PATH, + { request_uri: VALID_REQUEST_URI }, + AUTH_HEADER, + ) + + expect(res.status).toBe(200) + expect(res.json.deleted).toBe(0) + }) + + it('decodes percent-encoded request_uri tails before lookup', async () => { + // Real PAR request_uris come from upstream as `req-${hex}` so they + // don't actually need decoding, but the hook calls + // decodeURIComponent() on the tail — verify the decode path works + // by feeding a tail with an encoded character. + const { app, fakeDelete } = setupApp({ deletedRows: 1 }) + const res = await postHook( + app, + DELETE_PAR_PATH, + { request_uri: 'urn:ietf:params:oauth:request_uri:req-foo%20bar' }, + AUTH_HEADER, + ) + + expect(res.status).toBe(200) + expect(fakeDelete.inspect().wheres).toEqual([['id', '=', 'req-foo bar']]) + }) + + it('returns 500 and logs when the underlying delete throws', async () => { + const { app, logger } = setupApp({ failOnDelete: true }) + const res = await postHook( + app, + DELETE_PAR_PATH, + { request_uri: VALID_REQUEST_URI }, + AUTH_HEADER, + ) + + expect(res.status).toBe(500) + expect(logger.error).toHaveBeenCalledWith( + expect.objectContaining({ requestUri: VALID_REQUEST_URI }), + expect.stringContaining('Failed to delete'), + ) + }) + + it('coerces bigint numDeletedRows to a regular Number', async () => { + const { app } = setupApp({ deletedRows: BigInt(1) }) + const res = await postHook( + app, + DELETE_PAR_PATH, + { request_uri: VALID_REQUEST_URI }, + AUTH_HEADER, + ) + + expect(res.status).toBe(200) + expect(res.json.deleted).toBe(1) + expect(typeof res.json.deleted).toBe('number') + }) +}) diff --git a/packages/pds-core/src/index.ts b/packages/pds-core/src/index.ts index 5d1d1f6d..49b4e953 100644 --- a/packages/pds-core/src/index.ts +++ b/packages/pds-core/src/index.ts @@ -37,12 +37,12 @@ import { createLogger, verifyCallback, verifyInternalSecret, - escapeHtml, validateLocalPart, resolveClientMetadata, getClientCss, getClientMetadataCacheStatus, getEpdsVersion, + renderError, validateClientMetadataForPreview, } from '@certified-app/shared' import { shouldRewriteSecFetchSite } from './lib/sec-fetch-site-rewrite.js' @@ -64,6 +64,7 @@ import { createChooserEnrichmentMiddleware } from './chooser-enrichment.js' import { createUpstreamFaviconMiddleware } from './upstream-favicon.js' import { createAuthUiGuard, parsePromptTokens } from './auth-ui-guard.js' import { loadDeviceAccountEmails } from './lib/device-accounts.js' +import { handleCallbackError } from './lib/epds-callback-error.js' import { installTestHooks } from './lib/test-hooks.js' const logger = createLogger('pds-core') @@ -254,6 +255,18 @@ async function main() { return } + // Captured from Step 2's requestManager.get() — used by the catch + // block to redirect any later failure back to the client per RFC + // 6749 §4.1.2.1, even when the PAR row has since been deleted (in + // particular: RequestManager.get() deletes any expired row in the + // same call that throws AccessDeniedError, so by the time the + // catch block runs there's nothing left to re-read). Empty when + // Step 2 itself threw — i.e. the PAR was already dead on entry, + // the case the @par-callback-error scenario covers — and the + // catch falls through to a styled HTML page in that branch. + let capturedRedirectUri: string | undefined + let capturedState: string | undefined + try { // Step 1: Load or create device session const deviceInfo = await provider.deviceManager.load( @@ -294,6 +307,12 @@ async function main() { // eslint-disable-next-line @typescript-eslint/no-explicit-any -- @atproto/oauth-provider requestManager not exported const requestData = await (provider.requestManager as any).get(requestUri) const { clientId } = requestData + // Stash redirect_uri/state now while the PAR is alive — if a later + // step throws and the row has since been deleted (e.g. flushed + // post-success or the test-only delete-par hook), the catch block + // can still mount an RFC 6749 redirect to the client. + capturedRedirectUri = requestData?.parameters?.redirect_uri + capturedState = requestData?.parameters?.state // Step 3: Resolve or create the account. // Use the PDS accountManager directly — account.sqlite is the single source of truth. @@ -583,36 +602,15 @@ async function main() { 'ePDS callback: redirecting to stock /oauth/authorize for consent/approval', ) } catch (err) { - logger.error({ err }, 'ePDS callback error') - - // Try to redirect error back to client - try { - // eslint-disable-next-line @typescript-eslint/no-explicit-any -- @atproto/oauth-provider requestManager not exported - const requestData = await (provider.requestManager as any).get( - requestUri, - ) - const redirectUri = requestData?.parameters?.redirect_uri - if (redirectUri) { - const errorUrl = new URL(redirectUri) - errorUrl.searchParams.set('error', 'server_error') - errorUrl.searchParams.set( - 'error_description', - 'Authentication failed', - ) - errorUrl.searchParams.set('iss', pdsUrl) - if (requestData.parameters.state) { - errorUrl.searchParams.set('state', requestData.parameters.state) - } - res.redirect(303, errorUrl.toString()) - return - } - } catch { - // Fall through - } - - if (!res.headersSent) { - res.status(500).json({ error: 'Authentication failed' }) - } + handleCallbackError({ + res, + err, + capturedRedirectUri, + capturedState, + pdsUrl, + logger, + renderError, + }) } }) @@ -1201,14 +1199,6 @@ async function checkHandleRoute( } } -function renderError(message: string): string { - return ` - -Error -

${escapeHtml(message)}

-` -} - main().catch((err: unknown) => { logger.fatal({ err }, 'Failed to start ePDS') process.exit(1) diff --git a/packages/pds-core/src/lib/epds-callback-error.ts b/packages/pds-core/src/lib/epds-callback-error.ts new file mode 100644 index 00000000..45656035 --- /dev/null +++ b/packages/pds-core/src/lib/epds-callback-error.ts @@ -0,0 +1,160 @@ +/** + * Error response logic for /oauth/epds-callback. + * + * Lives in its own module so the branching can be unit-tested without + * spinning up the full pds-core stack. The handler in index.ts catches + * any failure inside the callback and delegates to handleCallbackError + * with the captured redirect_uri/state from Step 2's requestManager.get(). + * + * Two response paths: + * 1. RFC 6749 §4.1.2.1 redirect to the OAuth client's redirect_uri, + * with error / error_description / iss / state query params. This + * is the preferred path because the user lands on the client's + * own UI which can offer a retry that fits the app. + * 2. Styled HTML fallback when no redirect_uri is recoverable + * (Step 2 itself threw before populating the captures — the PAR + * was already dead on entry). + * + * Two error classifications: + * - "expired": user-paced timeout. error=access_denied to match + * @atproto/oauth-provider's own choice on parallel paths, with a + * timeout-explaining error_description; HTTP 400 on the HTML + * fallback (4xx because it's a recoverable client problem, not a + * server bug). + * - other: generic server failure. error=server_error per spec, 500 + * on the HTML fallback. + */ +import type { Response } from 'express' +import type { Logger } from 'pino' + +/** Decoded view of a thrown error for response shaping. */ +export interface CallbackErrorClassification { + /** RFC 6749 §4.1.2.1 error code to surface on the redirect / page. */ + code: 'access_denied' | 'server_error' + /** Human-readable copy intended for direct display to the user. */ + description: string + /** True when the failure is a recognised dead-PAR signal. */ + isExpired: boolean +} + +/** + * The dead-PAR pattern we recognise. Matches the upstream-thrown + * messages for both flavours of dead PAR: + * - "This request has expired" (AccessDeniedError, row was alive at + * request time but past its expiresAt) + * - "Unknown request_uri" (InvalidRequestError, row was already + * gone — what /_internal/test/delete-par produces, and what + * happens after a second callback hit since the first call's + * deleteRequest swept the row) + * We also keep a generic "expired" / "invalid_grant" catch-all in + * case upstream rewords its messages in a future patch release. + * + * Exported as a constant so tests can import the same regex they're + * verifying without re-typing it. + */ +export const EXPIRED_PAR_MESSAGE_PATTERN = + /request has expired|unknown request_uri|invalid_grant|expired/i + +const EXPIRED_DESCRIPTION = + 'Your sign-in took too long to complete and timed out. Please start sign-in again.' +const SERVER_ERROR_DESCRIPTION = 'Authentication failed.' + +export function classifyCallbackError( + err: unknown, +): CallbackErrorClassification { + const message = err instanceof Error ? err.message : String(err) + const isExpired = EXPIRED_PAR_MESSAGE_PATTERN.test(message) + return { + code: isExpired ? 'access_denied' : 'server_error', + description: isExpired ? EXPIRED_DESCRIPTION : SERVER_ERROR_DESCRIPTION, + isExpired, + } +} + +export interface HandleCallbackErrorOpts { + res: Response + err: unknown + /** redirect_uri stashed from Step 2's requestManager.get(); empty when + * Step 2 itself threw. */ + capturedRedirectUri: string | undefined + /** state stashed from Step 2's requestManager.get(); preserved on the + * redirect for CSRF round-trip. */ + capturedState: string | undefined + /** Issuer identifier per RFC 9207, set on the redirect. */ + pdsUrl: string + logger: Pick + /** Renders the styled HTML fallback page. Injected so tests can + * assert on the rendered string without pulling in the real + * renderer. */ + renderError: (message: string) => string +} + +export function handleCallbackError(opts: HandleCallbackErrorOpts): void { + const { + res, + err, + capturedRedirectUri, + capturedState, + pdsUrl, + logger, + renderError, + } = opts + + const { code, description, isExpired } = classifyCallbackError(err) + + // PAR-expiry is an expected user-paced timeout, not a server fault. + // Log it at `warn` so it stays visible in operational logs without + // tripping error-level alerting once expiry becomes routine in + // production. Anything else (account creation failed, store down, + // etc.) is genuinely server-side and stays at `error`. + if (isExpired) { + logger.warn({ err }, 'ePDS callback: sign-in timed out') + } else { + logger.error({ err }, 'ePDS callback error') + } + + if (!res.headersSent && capturedRedirectUri) { + // The redirect_uri was captured from a successful Step-2 read of + // the upstream PAR row, and @atproto/oauth-provider validates the + // URL at PAR creation, so this is essentially defensive — but the + // catch block exists precisely to spare the user a 500. If + // `new URL()` ever does throw (upstream invariant changes, + // test-only hook injects garbage, etc.), log it and fall through + // to the HTML fallback rather than crashing the error handler + // itself. + let errorUrl: URL | null = null + try { + errorUrl = new URL(capturedRedirectUri) + } catch (urlErr) { + logger.error( + { err: urlErr, capturedRedirectUri }, + 'ePDS callback: captured redirect_uri is not a valid URL — falling back to HTML error page', + ) + } + if (errorUrl) { + // Cache-Control: no-store on the redirect so a browser or + // intermediary doesn't preserve the per-attempt `state` / + // error_description query params (would leak across users on a + // shared cache) and doesn't replay a stale 303 on refresh, + // which would skip the user past a fresh sign-in attempt. + res.setHeader('Cache-Control', 'no-store') + errorUrl.searchParams.set('error', code) + errorUrl.searchParams.set('error_description', description) + errorUrl.searchParams.set('iss', pdsUrl) + if (capturedState) errorUrl.searchParams.set('state', capturedState) + res.redirect(303, errorUrl.toString()) + return + } + } + + if (!res.headersSent) { + // Cache-Control: no-store on the HTML page too — the page is + // produced from per-request state, so a cached copy is at best + // misleading on a later attempt. + res.setHeader('Cache-Control', 'no-store') + res + .status(isExpired ? 400 : 500) + .type('html') + .send(renderError(description)) + } +} diff --git a/packages/pds-core/src/lib/test-hooks.ts b/packages/pds-core/src/lib/test-hooks.ts index 4b5aca02..d8984886 100644 --- a/packages/pds-core/src/lib/test-hooks.ts +++ b/packages/pds-core/src/lib/test-hooks.ts @@ -1,7 +1,7 @@ /** * E2E test-only hooks. Mounted only when EPDS_TEST_HOOKS=1 and refused * outright when NODE_ENV=production. Mirrors auth-service's /_internal/test/* - * pattern: narrow UPDATEs that backdate a single timestamp to reproduce + * pattern: narrow UPDATEs / DELETEs that mutate one row to reproduce * time-dependent behaviour without waiting out the wall-clock TTL. * * Currently exposes: @@ -11,12 +11,41 @@ * matching row(s). Used by the e2e suite to age bindings past * upstream's authenticationMaxAge (7d) so checkLoginRequired returns * true for the targeted binding(s). + * + * POST /_internal/test/delete-par + * Body: {request_uri} + * Deletes the matching `authorization_request` row. Used by the + * @par-callback-error scenario to reproduce the production failure + * where /oauth/epds-callback hits an expired/missing PAR — the + * fix in the same commit responds with a friendly OAuth-spec + * redirect (or styled HTML) instead of leaking + * `{"error": "Authentication failed"}` JSON. */ import express, { type Application } from 'express' import type { PDS } from '@atproto/pds' import { verifyInternalSecret } from '@certified-app/shared' import type { Logger } from 'pino' +const REQUEST_URI_PREFIX = 'urn:ietf:params:oauth:request_uri:' + +/** + * Validate the prefix AND attempt the URL-decode in one step, so a + * malformed `%`-escape (e.g. `req-%`) is treated as malformed input + * (400) rather than crashing the catch block and being reported as + * a 500 server error. Returns the decoded request id when valid, + * otherwise null. The caller logs the rejection so a malformed + * payload still leaves a breadcrumb. + */ +function decodeAndValidateRequestUri(value: string): string | null { + if (!value.startsWith(`${REQUEST_URI_PREFIX}req-`)) return null + try { + return decodeURIComponent(value.slice(REQUEST_URI_PREFIX.length)) + } catch (err) { + if (err instanceof URIError) return null + throw err + } +} + export function installTestHooks(opts: { pds: PDS app: Application @@ -41,7 +70,8 @@ export function installTestHooks(opts: { res.status(401).json({ error: 'Unauthorized' }) return } - const did = ((req.body?.did as string) || '').trim() + const rawDid: unknown = req.body?.did + const did = typeof rawDid === 'string' ? rawDid.trim() : '' const deviceId = typeof req.body?.deviceId === 'string' ? req.body.deviceId.trim() @@ -82,4 +112,43 @@ export function installTestHooks(opts: { } }, ) + + app.post('/_internal/test/delete-par', express.json(), async (req, res) => { + if (!verifyInternalSecret(req.headers['x-internal-secret'])) { + res.status(401).json({ error: 'Unauthorized' }) + return + } + const rawRequestUri: unknown = req.body?.request_uri + const requestUri = + typeof rawRequestUri === 'string' ? rawRequestUri.trim() : '' + const requestId = requestUri + ? decodeAndValidateRequestUri(requestUri) + : null + if (!requestId) { + res.status(400).json({ error: 'Missing or malformed request_uri' }) + return + } + try { + // Same Kysely instance as expire-device-account above. The + // authorization_request table is owned by @atproto/pds — see + // its account-manager/db/schema/authorization-request.ts. We + // narrow the cast to a single column lookup so the absence + // of a typed schema here doesn't propagate. + // eslint-disable-next-line @typescript-eslint/no-explicit-any -- authorization_request shape not exported by @atproto/pds + const db = pds.ctx.accountManager.db.db as any + const result = await db + .deleteFrom('authorization_request') + .where('id', '=', requestId) + .executeTakeFirst() + const deleted = Number(result?.numDeletedRows ?? 0) + logger.warn( + { requestUri, deleted }, + 'Deleted PAR row — /oauth/epds-callback will treat this as expired', + ) + res.json({ deleted }) + } catch (err) { + logger.error({ err, requestUri }, 'Failed to delete PAR row') + res.status(500).json({ error: 'Internal server error' }) + } + }) } diff --git a/packages/shared/src/index.ts b/packages/shared/src/index.ts index 16988b3d..8b906e15 100644 --- a/packages/shared/src/index.ts +++ b/packages/shared/src/index.ts @@ -74,3 +74,7 @@ export type { export { getEpdsVersion } from './version.js' export { makeSafeFetch } from './safe-fetch.js' export type { SafeFetchOptions } from './safe-fetch.js' +export { postHook } from './test-utils/post-hook.js' +export type { PostHookResult } from './test-utils/post-hook.js' +export { ERROR_CSS, renderError } from './render-error.js' +export type { RenderErrorOptions } from './render-error.js' diff --git a/packages/shared/src/render-error.ts b/packages/shared/src/render-error.ts new file mode 100644 index 00000000..e19c3d4e --- /dev/null +++ b/packages/shared/src/render-error.ts @@ -0,0 +1,65 @@ +import { escapeHtml } from './html.js' + +/** + * CSS shared across every styled error page in the project. Both + * auth-service and pds-core consume it as-is. Layout is a centred + * white card on a light-grey body, designed to look reasonable + * regardless of which host serves it. + * + * Auth-service composes additional rules on top (the "Powered by + * Certified" footer) — see `auth-service/src/lib/render-error.ts`. + */ +export const ERROR_CSS = ` + * { box-sizing: border-box; margin: 0; padding: 0; } + body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #f5f5f5; min-height: 100vh; display: flex; align-items: center; justify-content: center; padding: 20px; } + .page-wrap { display: flex; flex-direction: column; align-items: stretch; max-width: 420px; width: 100%; } + .container { background: white; border-radius: 12px; padding: 40px; width: 100%; box-shadow: 0 2px 8px rgba(0,0,0,0.08); text-align: center; } + h1 { font-size: 24px; margin-bottom: 16px; color: #111; } + .error { color: #dc3545; background: #fdf0f0; padding: 12px; border-radius: 8px; font-size: 15px; line-height: 1.5; } +` + +export interface RenderErrorOptions { + /** Page and the in-page <h1>. Defaults to "Error". */ + title?: string + /** Extra CSS to append after ERROR_CSS, e.g. auth-service's + * powered-by footer rules. */ + extraCss?: string + /** Additional HTML inserted INSIDE `.page-wrap` after the + * `.container`, e.g. auth-service's powered-by footer link. + * Caller is responsible for ensuring this string is safe HTML — + * it is not escaped. */ + bodyExtra?: string +} + +/** + * Render a styled HTML error page. Used by every endpoint that needs + * to surface a recoverable problem to the user without leaking JSON + * or sending the user to a stack trace. Status codes are the + * caller's responsibility — this only produces the body. + */ +export function renderError( + message: string, + options: RenderErrorOptions = {}, +): string { + const { title = 'Error', extraCss = '', bodyExtra = '' } = options + return `<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="utf-8"> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <link rel="icon" href="/static/favicon.svg" media="(prefers-color-scheme: light)" type="image/svg+xml"> + <link rel="icon" href="/static/favicon-dark.svg" media="(prefers-color-scheme: dark)" type="image/svg+xml"> + <title>${escapeHtml(title)} + + + +
+
+

${escapeHtml(title)}

+

${escapeHtml(message)}

+
+ ${bodyExtra} +
+ +` +} diff --git a/packages/shared/src/test-utils/post-hook.ts b/packages/shared/src/test-utils/post-hook.ts new file mode 100644 index 00000000..73cf0b5f --- /dev/null +++ b/packages/shared/src/test-utils/post-hook.ts @@ -0,0 +1,65 @@ +import type { Server } from 'node:http' + +/** + * Anything with `listen(0)` returning a Node `http.Server` (or + * something Server-shaped: `address()`, `unref()`, `close()`). Both + * Express's `app.listen` and the raw `node:http` `createServer` + * satisfy this. Typed structurally so this module doesn't pull + * `@types/express` (or `express` as a runtime dep) into + * `@certified-app/shared`. + */ +type Listenable = { + listen(port: number): Server +} + +export interface PostHookResult { + status: number + json: Record +} + +/** + * Spin up a server-like on an ephemeral port, POST a JSON body to a + * path on it, tear the server down. Used by both auth-service's and + * pds-core's test-hooks suites so each drives its installer through a + * real HTTP roundtrip without standing up the full service. + */ +export async function postHook( + app: Listenable, + hookPath: string, + body: Record, + headers: Record = {}, +): Promise { + const server = app.listen(0) + // Single try/finally covering BOTH the port-resolution step and the + // fetch step. If `'listening'` fires with an unexpected address shape + // (or the server emits `'error'` before listening), the inner promise + // rejects and we still hit `closeServer` rather than leaking the + // listener and hanging the test runner. + try { + server.unref() + const port = await new Promise((resolve, reject) => { + server.once('error', reject) + server.once('listening', () => { + const addr = server.address() + if (typeof addr === 'object' && addr) { + resolve(addr.port) + } else { + reject(new Error('Failed to resolve ephemeral port')) + } + }) + }) + const res = await fetch(`http://127.0.0.1:${port}${hookPath}`, { + method: 'POST', + headers: { 'Content-Type': 'application/json', ...headers }, + body: JSON.stringify(body), + }) + const json = (await res.json().catch(() => ({}))) as Record + return { status: res.status, json } + } finally { + await new Promise((resolve) => { + server.close(() => { + resolve() + }) + }) + } +}