diff --git a/.changeset/popular-trainers-check.md b/.changeset/popular-trainers-check.md new file mode 100644 index 0000000..c395b69 --- /dev/null +++ b/.changeset/popular-trainers-check.md @@ -0,0 +1,5 @@ +--- +'markdownlayer': minor +--- + +Support transforming links and images with exclusion of document files diff --git a/examples/starter/src/content/blog-posts/post-2/dice.jpg b/examples/starter/src/content/blog-posts/post-2/dice.jpg new file mode 100644 index 0000000..bef8893 Binary files /dev/null and b/examples/starter/src/content/blog-posts/post-2/dice.jpg differ diff --git a/examples/starter/src/content/blog-posts/post-2/index.md b/examples/starter/src/content/blog-posts/post-2/index.md index 5f3ed72..3c7dccb 100644 --- a/examples/starter/src/content/blog-posts/post-2/index.md +++ b/examples/starter/src/content/blog-posts/post-2/index.md @@ -5,4 +5,12 @@ published: 2024-01-31 image: cover.jpg --- -Some content for post 2 +## Hello World + +> Hello **World** + +Continuing from [post 1](../post-1/index.md) + +Some content. Download file [here](./sample.txt). + +![A linked image](./dice.jpg) diff --git a/examples/starter/src/content/blog-posts/post-2/sample.txt b/examples/starter/src/content/blog-posts/post-2/sample.txt new file mode 100644 index 0000000..8f4b688 --- /dev/null +++ b/examples/starter/src/content/blog-posts/post-2/sample.txt @@ -0,0 +1 @@ +This is a dummy file to be downloaded. diff --git a/packages/markdownlayer/src/assets.test.ts b/packages/markdownlayer/src/assets.test.ts index 4574ec9..95fbfe7 100644 --- a/packages/markdownlayer/src/assets.test.ts +++ b/packages/markdownlayer/src/assets.test.ts @@ -57,7 +57,7 @@ describe('processAsset', () => { vi.mocked(readFile).mockResolvedValue(buffer); - const metadata = await processAsset({ input, from, format, baseUrl: '/static/' }); + const metadata = await processAsset({ input, from, format, baseUrl: '/static/' }, true); expect(metadata).toEqual({ src: '/static/image.e23151fe.png', format: 'png', @@ -72,7 +72,23 @@ describe('processAsset', () => { // Check that the asset is added to the assets map expect(assets['image.e23151fe.png']).toBe(join(from, '..') + '/' + input); }); + + it('should handle non-image assets gracefully', async () => { + const buffer = Buffer.from('some non-image data'); + const input = 'document.txt'; + const from = __dirname; + const format = '[name].[hash:8].[ext]'; + + vi.mocked(readFile).mockResolvedValue(buffer); + + const url = await processAsset({ input, from, format, baseUrl: '/static/' }); + expect(url).toBe('/static/document.ed975c52.txt'); + + // Check that the asset is not added to the assets map + expect(assets['document.ed975c52.txt']).toBe(join(from, '..') + '/' + input); + }); }); + describe('isValidImageFormat', () => { it('should return true for valid image formats', () => { expect(isValidImageFormat('jpeg')).toBe(true); diff --git a/packages/markdownlayer/src/assets.ts b/packages/markdownlayer/src/assets.ts index 7dd6cc5..4c1f1da 100644 --- a/packages/markdownlayer/src/assets.ts +++ b/packages/markdownlayer/src/assets.ts @@ -27,17 +27,20 @@ export async function getImageMetadata(buffer: Buffer): Promise { +export async function processAsset( + { + input, + from, + format, + baseUrl, + }: { + input: string; + from: string; + format: string; + baseUrl: string; + }, + isImage?: T, +): Promise { // e.g. input = '../assets/image.png?foo=bar#hash' const queryIdx = input.indexOf('?'); const hashIdx = input.indexOf('#'); @@ -63,9 +66,11 @@ export async function processAsset({ const src = baseUrl + name + suffix; assets[name] = path; // track asset for copying later + if (!isImage) return src as T extends true ? ImageData : string; + const metadata = await getImageMetadata(buffer); if (metadata == null) throw new Error(`invalid image: ${from}`); - return { src, ...metadata }; + return { src, ...metadata } as T extends true ? ImageData : string; } export function isValidImageFormat(format: keyof FormatEnum): format is ImageFormat { diff --git a/packages/markdownlayer/src/bundle.ts b/packages/markdownlayer/src/bundle.ts index 4d6e963..9cb3348 100644 --- a/packages/markdownlayer/src/bundle.ts +++ b/packages/markdownlayer/src/bundle.ts @@ -4,7 +4,6 @@ import type { Options as CompileOptions } from '@mdx-js/esbuild'; import mdxESBuild from '@mdx-js/esbuild'; import type { BuildOptions, Plugin } from 'esbuild'; import esbuild, { type Message } from 'esbuild'; -import { StringDecoder } from 'node:string_decoder'; import rehypeRaw, { type Options as RehypeRawOptions } from 'rehype-raw'; import remarkDirective from 'remark-directive'; import remarkEmoji from 'remark-emoji'; @@ -12,35 +11,33 @@ import remarkFrontmatter from 'remark-frontmatter'; import remarkGfm from 'remark-gfm'; import type { Pluggable, PluggableList } from 'unified'; -import { remarkAdmonitions, remarkHeadings } from './remark'; -import type { DocumentFormat, MarkdownlayerConfigPlugins } from './types'; +import { remarkAdmonitions, remarkHeadings, remarkTransformLinks } from './remark'; +import type { DocumentFormat, ResolvedConfig } from './types'; export type BundleProps = { - entryPath: string; + config: ResolvedConfig; + path: string; contents: string; format: DocumentFormat; - plugins: MarkdownlayerConfigPlugins; frontmatter: Record; }; export type BundleResult = { code: string; errors: Message[] }; - -export async function bundle({ format, ...options }: BundleProps): Promise { +export async function bundle({ format, config, ...options }: BundleProps): Promise { switch (format) { case 'md': case 'mdx': - return await mdx({ format, ...options }); + return await mdx({ config, format, ...options }); case 'mdoc': - return await mdoc({ ...options }); + return await mdoc({ config, ...options }); default: throw new Error(`Unsupported format: ${format}`); } } -type BundleMdocProps = Omit; - -async function mdoc({ contents, plugins: { markdoc }, frontmatter }: BundleMdocProps): Promise { +type BundleMdocProps = Pick; +async function mdoc({ config: { markdoc }, contents, frontmatter }: BundleMdocProps): Promise { const { allowComments = true, allowIndentation = true, slots, transformConfig } = markdoc ?? {}; const tokenizer = new Markdoc.Tokenizer({ allowComments, allowIndentation }); const tokens = tokenizer.tokenize(contents); @@ -60,16 +57,13 @@ async function mdoc({ contents, plugins: { markdoc }, frontmatter }: BundleMdocP }; } -type BundleMdxProps = Omit & { format: 'md' | 'mdx' }; - -const decoder = new StringDecoder('utf8'); - -async function mdx({ entryPath, contents, format, plugins }: BundleMdxProps): Promise { +type BundleMdxProps = Pick & { format: 'md' | 'mdx' }; +async function mdx({ config, path, contents, format }: BundleMdxProps): Promise { const inMemoryPlugin: Plugin = { name: 'in-memory-plugin', setup(build) { build.onResolve({ filter: /.*/ }, ({ path: filePath }) => { - if (filePath === entryPath) { + if (filePath === path) { return { path: filePath, pluginData: { inMemory: true, contents: contents }, @@ -82,17 +76,73 @@ async function mdx({ entryPath, contents, format, plugins }: BundleMdxProps): Pr }, }; - const compileOptions = getCompileOptions({ format, plugins }); + const { + admonitions = true, + emoji = true, + gfm = true, + transformLinks = true, + recmaPlugins, + remarkPlugins, + rehypePlugins, + remarkRehypeOptions, + } = config; + + const compileOptions: CompileOptions = { + format, + recmaPlugins, + rehypePlugins, + remarkRehypeOptions, + + // configure remark plugins + remarkPlugins: [ + // standard plugins + remarkFrontmatter, + remarkDirective, // necessary to handle all types of directives including admonitions (containerDirective) + ...((admonitions + ? [admonitions === true ? remarkAdmonitions : [remarkAdmonitions, admonitions]] + : []) as PluggableList), + remarkHeadings, // must be added before handling of ToC and links + ...((emoji ? [emoji === true ? remarkEmoji : [remarkEmoji, emoji]] : []) as PluggableList), + ...((gfm ? [gfm === true ? remarkGfm : [remarkGfm, gfm]] : []) as PluggableList), + ...((transformLinks + ? [[remarkTransformLinks, transformLinks === true ? { config } : { config, ...transformLinks }]] + : []) as PluggableList), + + // user-provided plugins + ...(remarkPlugins ?? []), + ], + }; + + if (format === 'md') { + // This is what permits to embed HTML elements with format 'md' + // See https://github.com/facebook/docusaurus/pull/8960 + // See https://github.com/mdx-js/mdx/pull/2295#issuecomment-1540085960 + const rehypeRawPlugin: Pluggable = [ + rehypeRaw, + { + passThrough: [ + 'mdxFlowExpression', + 'mdxTextExpression', + // jsx, js + 'mdxJsxFlowElement', + 'mdxJsxTextElement', + 'mdxjsEsm', + ], + } satisfies RehypeRawOptions, + ]; + compileOptions.rehypePlugins!.unshift(rehypeRawPlugin); + } + const buildOptions: BuildOptions = { - entryPoints: [entryPath], + entryPoints: [path], write: false, bundle: true, + target: 'es2020', format: 'iife', globalName: 'Component', - minify: false, // let the bundling framework handle the minification - splitting: false, treeShaking: false, - target: 'es2020', + splitting: false, + minify: false, // let the bundling framework handle the minification keepNames: true, plugins: [ globalExternals({ @@ -107,79 +157,10 @@ async function mdx({ entryPath, contents, format, plugins }: BundleMdxProps): Pr }; const bundled = await esbuild.build(buildOptions); - const code = decoder.write(Buffer.from(bundled.outputFiles![0].contents)); + const code = bundled.outputFiles![0].text; return { code: `${code};return Component;`, errors: bundled.errors, }; } - -type GetCompileOptionsProps = { format: 'md' | 'mdx'; plugins: MarkdownlayerConfigPlugins }; -type ProcessorCacheEntry = { format: DocumentFormat; options: CompileOptions }; - -const ProcessorsCache = new Map(); - -function getCompileOptions({ format, plugins }: GetCompileOptionsProps): CompileOptions { - let cacheEntry = ProcessorsCache.get(format); - const { - admonitions = true, - emoji = true, - gfm = true, - recmaPlugins, - remarkPlugins, - rehypePlugins, - remarkRehypeOptions, - } = plugins; - - if (!cacheEntry) { - const options: CompileOptions = { - format, - recmaPlugins, - rehypePlugins, - remarkRehypeOptions, - - // configure remark plugins - remarkPlugins: [ - // standard plugins - remarkFrontmatter, - remarkDirective, // necessary to handle all types of directives including admonitions (containerDirective) - ...((admonitions - ? [admonitions === true ? remarkAdmonitions : [remarkAdmonitions, admonitions]] - : []) as PluggableList), - remarkHeadings, // must be added before handling of ToC and links - ...((emoji ? [emoji === true ? remarkEmoji : [remarkEmoji, emoji]] : []) as PluggableList), - // remarkToc, - ...((gfm ? [gfm === true ? remarkGfm : [remarkGfm, gfm]] : []) as PluggableList), - - // user-provided plugins - ...(remarkPlugins ?? []), - ], - }; - - if (format === 'md') { - // This is what permits to embed HTML elements with format 'md' - // See https://github.com/facebook/docusaurus/pull/8960 - // See https://github.com/mdx-js/mdx/pull/2295#issuecomment-1540085960 - const rehypeRawPlugin: Pluggable = [ - rehypeRaw, - { - passThrough: [ - 'mdxFlowExpression', - 'mdxTextExpression', - // jsx, js - 'mdxJsxFlowElement', - 'mdxJsxTextElement', - 'mdxjsEsm', - ], - } satisfies RehypeRawOptions, - ]; - options.rehypePlugins!.unshift(rehypeRawPlugin); - } - - cacheEntry = { format, options }; - ProcessorsCache.set(format, cacheEntry); - } - - return cacheEntry.options; -} diff --git a/packages/markdownlayer/src/remark/index.ts b/packages/markdownlayer/src/remark/index.ts index 6ee2f4a..da5ece4 100644 --- a/packages/markdownlayer/src/remark/index.ts +++ b/packages/markdownlayer/src/remark/index.ts @@ -1,5 +1,7 @@ export type * from './admonitions'; export type * from './headings'; +export type * from './transform-links'; export { default as remarkAdmonitions } from './admonitions'; export { default as remarkHeadings } from './headings'; +export { default as remarkTransformLinks } from './transform-links'; diff --git a/packages/markdownlayer/src/remark/transform-links.ts b/packages/markdownlayer/src/remark/transform-links.ts new file mode 100644 index 0000000..91c5437 --- /dev/null +++ b/packages/markdownlayer/src/remark/transform-links.ts @@ -0,0 +1,71 @@ +import type { Definition, Image, Link, Node } from 'mdast'; +import { extname } from 'path'; +import type { Transformer } from 'unified'; +import { visit } from 'unist-util-visit'; +import { processAsset } from '../assets'; +import type { ResolvedConfig } from '../types'; +import { isRelativePath } from '../utils'; + +// Default extensions to be excluded +const DefaultExcludedExtensions = [ + // markdown + '.md', + '.markdown', + '.mdown', + '.mkdn', + '.mkd', + '.mdwn', + '.mkdown', + '.ron', + + // mdx and mdoc + '.mdx', + '.mdoc', +]; + +export type RemarkTransformLinksOptions = { + // TODO: transform this links by checking for the references instead of skipping them + /** + * Extensions that should not be transformed. + * @default ['.md', '.markdown', '.mdown', '.mkdn', '.mkd', '.mdwn', '.mkdown', '.ron', '.mdx', '.mdoc'] + */ + excludeExtensions?: readonly string[]; +}; + +type Options = RemarkTransformLinksOptions & { config: ResolvedConfig }; +export default function remarkTransformLinks({ + excludeExtensions = DefaultExcludedExtensions, + config, +}: Options): Transformer { + const { output } = config; + return async (root, file) => { + const links: Record = {}; + + // image e.g. ![alt](./image.png) + // link e.g. [link](./link.md) or [file](./file.txt) + // definition e.g. [link][id] + visit(root, ['link', 'image', 'definition'], (n: Node) => { + const node = n as Link | Image | Definition; + const { url: src } = node; + if (excludeExtensions.includes(extname(src))) return; + if (isRelativePath(src)) { + const nodes = links[src] || []; + nodes.push(node); + links[src] = nodes; + } + }); + + await Promise.all( + Object.entries(links).map(async ([src, nodes]) => { + const url = await processAsset({ input: src, from: file.path, format: output.format, baseUrl: output.base }); + if (!url || url === src) return; + for (const node of nodes) { + if (node.url === src) { + node.url = url; + continue; + } + } + }), + ); + }; +} diff --git a/packages/markdownlayer/src/schemas/body.ts b/packages/markdownlayer/src/schemas/body.ts index c257279..fc0ed94 100644 --- a/packages/markdownlayer/src/schemas/body.ts +++ b/packages/markdownlayer/src/schemas/body.ts @@ -52,9 +52,9 @@ export function body({ // bundle the document const options: BundleProps = { contents, - entryPath: path, + path, format, - plugins: { ...config }, + config, frontmatter, }; const { code, errors } = await bundle(options); diff --git a/packages/markdownlayer/src/schemas/image.ts b/packages/markdownlayer/src/schemas/image.ts index 2554fd3..f854c04 100644 --- a/packages/markdownlayer/src/schemas/image.ts +++ b/packages/markdownlayer/src/schemas/image.ts @@ -68,7 +68,7 @@ export function image({ remote = false, emit = true, path, config: { output } }: } return { - ...(await processAsset({ input: src, from: path, format: output.format, baseUrl: output.base })), + ...(await processAsset({ input: src, from: path, format: output.format, baseUrl: output.base }, true)), alt, }; } catch (error) { diff --git a/packages/markdownlayer/src/types.ts b/packages/markdownlayer/src/types.ts index 2d86b20..0d576ee 100644 --- a/packages/markdownlayer/src/types.ts +++ b/packages/markdownlayer/src/types.ts @@ -6,7 +6,7 @@ import type { PluggableList } from 'unified'; import type { AnyZodObject, ZodDiscriminatedUnion, ZodEffects, ZodIntersection, ZodUnion } from 'zod'; import type { MarkdownlayerCache } from './cache'; -import type { AdmonitionPluginOptions } from './remark'; +import type { AdmonitionPluginOptions, RemarkTransformLinksOptions } from './remark'; import type { SchemaContext } from './schemas/resolve'; type DocumentDefinitionSchemaWithoutEffects = @@ -131,6 +131,16 @@ export type MarkdownlayerConfigPlugins = { */ gfm?: boolean | RemarkGfmOptions; + /** + * Whether to transform linked files (e.g. images and files) in a document's content to the output directory. + * - `true`: Use default options + * - `false`: Disable the plugin + * - `RemarkTransformLinksOptions`: Use custom options + * + * @default true + */ + transformLinks?: boolean | RemarkTransformLinksOptions; + /** List of recma (esast, JavaScript) plugins. */ recmaPlugins?: PluggableList | null | undefined; diff --git a/packages/markdownlayer/src/utils.test.ts b/packages/markdownlayer/src/utils.test.ts index f6d9ddc..9b9f268 100644 --- a/packages/markdownlayer/src/utils.test.ts +++ b/packages/markdownlayer/src/utils.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from 'vitest'; -import { generateTypeName, getDataVariableName } from './utils'; +import { generateTypeName, getDataVariableName, isRelativePath } from './utils'; describe('getDataVariableName', () => { it('should return correct variable name for blog', () => { @@ -32,3 +32,34 @@ describe('generateTypeName', () => { expect(generateTypeName('comments')).toBe('Comment'); }); }); + +describe('isRelativePath', () => { + it('should return true for relative paths', () => { + expect(isRelativePath('relative/path')).toBe(true); + expect(isRelativePath('./relative/path')).toBe(true); + expect(isRelativePath('../relative/path')).toBe(true); + }); + + it('should return false for absolute URLs', () => { + expect(isRelativePath('http://example.com')).toBe(false); + expect(isRelativePath('https://example.com')).toBe(false); + expect(isRelativePath('ftp://example.com')).toBe(false); + }); + + it('should return false for absolute paths', () => { + expect(isRelativePath('/absolute/path')).toBe(false); + expect(isRelativePath('C:\\absolute\\path')).toBe(false); + }); + + it('should return false for protocol-relative URLs', () => { + expect(isRelativePath('//example.com')).toBe(false); + }); + + it('should return false for hash anchors', () => { + expect(isRelativePath('#anchor')).toBe(false); + }); + + it('should return false for query strings', () => { + expect(isRelativePath('?query=string')).toBe(false); + }); +}); diff --git a/packages/markdownlayer/src/utils.ts b/packages/markdownlayer/src/utils.ts index 7dfcce4..c5ab9dc 100644 --- a/packages/markdownlayer/src/utils.ts +++ b/packages/markdownlayer/src/utils.ts @@ -1,5 +1,9 @@ import { pluralize, singularize } from 'inflection'; +// https://github.com/sindresorhus/is-absolute-url/blob/main/index.js +const ABS_URL_RE = /^[a-zA-Z][a-zA-Z\d+\-.]*?:/; +const ABS_PATH_RE = /^(\/[^/\\]|[a-zA-Z]:\\)/; + export function getDataVariableName(type: string): string { return 'all' + pluralize(toPascalCase(type)); } @@ -15,3 +19,17 @@ function toPascalCase(str: string) { .map((word) => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()) // Capitalize the first letter of each word and join them .join(''); } + +/** + * Validates if a URL is a relative path. + * @param {string} url - The URL to validate. + * @returns {boolean} - Returns true if the URL is a relative path, otherwise false. + */ +export function isRelativePath(url: string): boolean { + if (url.startsWith('#')) return false; // ignore hash anchor + if (url.startsWith('?')) return false; // ignore query + if (url.startsWith('//')) return false; // ignore protocol relative urlet name + if (ABS_URL_RE.test(url)) return false; // ignore absolute url + if (ABS_PATH_RE.test(url)) return false; // ignore absolute path + return true; +}