Skip to content

Commit cdf5884

Browse files
heiskrCopilot
andauthored
Collapse runs of blank lines in transformer markdown output (#61480)
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent f50c571 commit cdf5884

4 files changed

Lines changed: 78 additions & 2 deletions

File tree

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/**
2+
* Post-processing for transformer-produced markdown that is about to be sent
3+
* to the client (via the `.md` URL suffix, `Accept: text/markdown`, or the
4+
* article-body API). Kept intentionally small so the same rules apply to
5+
* every transformer's output without each one having to opt in.
6+
*/
7+
8+
/**
9+
* Collapse runs of 3+ consecutive newlines down to 2 (i.e. at most one blank
10+
* line between blocks). Transformers that render conditional sections often
11+
* leave behind multiple blank lines when sections are empty; the rendered
12+
* markdown is otherwise valid but visually noisy in the `.md` output.
13+
*/
14+
export function collapseBlankLines(content: string): string {
15+
return content.replace(/\n{3,}/g, '\n\n')
16+
}
17+
18+
/**
19+
* Apply every normalization step that should run on transformer-produced
20+
* markdown before it leaves the server. Centralized so new rules (e.g.
21+
* trailing-whitespace stripping) can be added in one place.
22+
*/
23+
export function normalizeRenderedMarkdown(content: string): string {
24+
return collapseBlankLines(content)
25+
}

src/article-api/middleware/article-body.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import features from '@/versions/middleware/features'
77
import glossaries from '@/frame/middleware/context/glossaries'
88
import dataTables from '@/data-directory/middleware/data-tables'
99
import { transformerRegistry } from '@/article-api/transformers'
10+
import { normalizeRenderedMarkdown } from '@/article-api/lib/normalize-markdown'
1011
import { allVersions } from '@/versions/lib/all-versions'
1112
import type { Page } from '@/types'
1213

@@ -72,5 +73,7 @@ export async function getArticleBody(req: ExtendedRequestWithPageInfo) {
7273
effectiveApiVersion = allVersions[currentVersion].latestApiVersion || undefined
7374
}
7475

75-
return await transformer.transform(page, pathname, renderingReq.context, effectiveApiVersion)
76+
return normalizeRenderedMarkdown(
77+
await transformer.transform(page, pathname, renderingReq.context, effectiveApiVersion),
78+
)
7679
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import { describe, expect, test } from 'vitest'
2+
3+
import { collapseBlankLines, normalizeRenderedMarkdown } from '@/article-api/lib/normalize-markdown'
4+
5+
describe('collapseBlankLines', () => {
6+
test('collapses 3+ newlines down to 2', () => {
7+
const input = 'one\n\n\n\ntwo'
8+
expect(collapseBlankLines(input)).toBe('one\n\ntwo')
9+
})
10+
11+
test('leaves single blank lines untouched', () => {
12+
const input = 'one\n\ntwo'
13+
expect(collapseBlankLines(input)).toBe('one\n\ntwo')
14+
})
15+
16+
test('leaves single newlines untouched', () => {
17+
const input = 'one\ntwo'
18+
expect(collapseBlankLines(input)).toBe('one\ntwo')
19+
})
20+
21+
test('handles multiple runs across the document', () => {
22+
const input = 'a\n\n\nb\n\n\n\nc\n\nd'
23+
expect(collapseBlankLines(input)).toBe('a\n\nb\n\nc\n\nd')
24+
})
25+
26+
test('preserves trailing newline', () => {
27+
const input = 'one\n\n\ntwo\n'
28+
expect(collapseBlankLines(input)).toBe('one\n\ntwo\n')
29+
})
30+
31+
test('also collapses blank-line runs inside fenced code blocks', () => {
32+
// Markdown preserves blank lines inside ``` fences in the rendered
33+
// <pre><code>, so this regex does change rendered code-block output.
34+
// That is intentional: 3+ blank lines in a code sample is noise we want
35+
// to collapse the same way we do everywhere else.
36+
const input = 'before\n\n```\ncode\n\n\n\nmore code\n```\n\nafter'
37+
expect(collapseBlankLines(input)).toBe('before\n\n```\ncode\n\nmore code\n```\n\nafter')
38+
})
39+
})
40+
41+
describe('normalizeRenderedMarkdown', () => {
42+
test('applies blank-line collapsing', () => {
43+
expect(normalizeRenderedMarkdown('a\n\n\n\nb')).toBe('a\n\nb')
44+
})
45+
})

src/frame/middleware/render-page.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import statsd, { adaptForTimer } from '@/observability/lib/statsd'
1111
import type { ExtendedRequest } from '@/types'
1212
import { allVersions } from '@/versions/lib/all-versions'
1313
import { transformerRegistry } from '@/article-api/transformers'
14+
import { normalizeRenderedMarkdown } from '@/article-api/lib/normalize-markdown'
1415
import { minimumNotFoundHtml } from '../lib/constants'
1516
import { contentTypeCacheControl, defaultCacheControl } from './cache-control'
1617
import { nextHandleRequest } from './next'
@@ -113,7 +114,9 @@ export default async function renderPage(req: ExtendedRequest, res: Response) {
113114
// causes renderTitle/renderProp to output markdown instead of HTML,
114115
// which breaks the cheerio-based unwrap logic.
115116
const transformerContext = { ...context, markdownRequested: false }
116-
req.context.renderedPage = await transformer.transform(page, path, transformerContext)
117+
req.context.renderedPage = normalizeRenderedMarkdown(
118+
await transformer.transform(page, path, transformerContext),
119+
)
117120
} else {
118121
req.context.renderedPage = await buildRenderedPage(req)
119122
req.context.miniTocItems = buildMiniTocItems(req)

0 commit comments

Comments
 (0)